Initial commit: SIC harness (backend, web, pi-adapter, configs, docs)

- pnpm monorepo: apps/api (Fastify + SQLite + SSE), apps/web (React+Vite), packages/shared, packages/pi-adapter - Local auth (admin/webhook-runner roles) + Keycloak JWT ready - Multi-session chat with reliable history (user persisted before LLM, assistant persisted after stream) - Markdown knowledge base with /api/docs/search + /api/docs/:id - YAML webhook catalog with backend-only execution, retry/backoff, audit (webhook_runs), and per-user rate limit - Skills config (sre-on-call, blameless-postmortem, security-incident) injected into LLM system prompt - LLM provider failover chain (config/models.yml fallback + LLM_FALLBACK_CHAIN override) - Context-aware webhooks panel + backend id-mention safety net - Per-message stats (time/duration/tokens/model), Markdown+GFM render, code & table copy/download buttons - Vitest suite, end-to-end smoke test (scripts/smoke.mjs), per-session system prompt override - /metrics Prometheus endpoint + /api/metrics JSON, request-id correlation - dotenv with explicit repo-root path; envString/envNumber helpers (handles empty-string env) - Runbooks + SOPs under knowledge/ in English; README, docs, and INDEX.md in English
2026-06-29 16:20:53 +02:00
commit 62728b2200
89 changed files with 11992 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,118 @@
 # =============================================================
 # SIC — Super Incident Commander
 # Local development environment.
 #
 # How to use:
 #   1. Copy this file to `.env`:
 #        cp .env.example .env
 #   2. Edit `.env` and fill in the secrets (at minimum MINIMAX_API_KEY).
 #   3. Start the API:
 #        pnpm dev
 #
 # The API loads `.env` automatically via dotenv at boot. Real shell
 # environment variables always win over the file, so production /
 # docker setups that inject env vars keep working unchanged.
 #
 # Never commit a real `.env` file — it's gitignored.
 # =============================================================
 # ---------------------------------------------------------------
 # Server
 # ---------------------------------------------------------------
 API_PORT=8787
 HOST=0.0.0.0
 API_BODY_LIMIT_BYTES=1048576
 CORS_ALLOWED_ORIGINS=
 WEB_PORT=3000
 WEB_VITE_API_PROXY=http://localhost:8787
 # ---------------------------------------------------------------
 # Auth
 # ---------------------------------------------------------------
 # local: dev mode, returns a synthetic `local-user` with admin + webhook-runner roles
 # keycloak: validates Authorization: Bearer JWT against OIDC_ISSUER/OIDC_AUDIENCE
 AUTH_MODE=local
 OIDC_ISSUER=https://auth.rikrdo.com/realms/homelab
 OIDC_AUDIENCE=pi-chat
 # ---------------------------------------------------------------
 # Persistence
 # ---------------------------------------------------------------
 DATABASE_URL=sqlite://./data/pi-chat.db
 # ---------------------------------------------------------------
 # LLM provider (OpenAI-compatible)
 # ---------------------------------------------------------------
 # Default provider and base URL. The MiniMax and mr-auto model
 # entries in config/models.yml both read these.
 LLM_BASE_URL=https://api.minimax.io/v1
 # Per-model fallback chain. Override the default chain parsed from
 # config/models.yml. Comma-separated model ids in the order to try.
 # Set to empty to disable and use the YAML-only chain.
 # Example: LLM_FALLBACK_CHAIN=balanced,mr-auto
 LLM_FALLBACK_CHAIN=
 LLM_API_KEY=
 DEFAULT_MODEL=fast
 # Backwards-compat alias for the MiniMax key. Either this or LLM_API_KEY works.
 # Used by chat routes as a fallback when LLM_API_KEY is empty.
 MINIMAX_API_KEY=
 # Per-model API key overrides (config/models.yml -> model.api_key_env).
 # Only the mr-auto model needs this; MiniMax shares LLM_API_KEY.
 MR_AUTO_API_KEY=
 LLM_TIMEOUT_MS=30000
 # ---------------------------------------------------------------
 # Chat input limits
 # ---------------------------------------------------------------
 CHAT_MESSAGE_MAX_CHARS=8000
 # ---------------------------------------------------------------
 # Rate limits
 # ---------------------------------------------------------------
 # /api/chat/stream — per authenticated user
 CHAT_RATE_LIMIT_PER_MINUTE=20
 CHAT_RATE_LIMIT_BURST=5
 # POST /api/webhooks/:id/run — per webhook id (across all users)
 WEBHOOK_RATE_LIMIT_PER_MINUTE=60
 WEBHOOK_RATE_LIMIT_BURST=10
 # ---------------------------------------------------------------
 # Webhook execution
 # ---------------------------------------------------------------
 WEBHOOK_TIMEOUT_MS=15000
 WEBHOOK_RETRY_MAX_ATTEMPTS=3
 WEBHOOK_RETRY_INITIAL_BACKOFF_MS=500
 WEBHOOK_RETRY_MAX_BACKOFF_MS=5000
 # Audit retention
 WEBHOOK_RUNS_RETENTION_DAYS=30
 WEBHOOK_RUNS_MAX_PER_USER=1000
 WEBHOOK_AUDIT_PURGE_INTERVAL_MS=3600000
 # Webhook usage stats window (days) for /api/webhooks/usage
 WEBHOOK_USAGE_WINDOW_DAYS=7
 # ---------------------------------------------------------------
 # Config file paths
 # ---------------------------------------------------------------
 # Each config file can be overridden with an env var. Defaults
 # resolve relative to apps/api (where the API is started).
 MODELS_CONFIG_PATH=
 WEBHOOKS_CONFIG_PATH=
 SKILLS_CONFIG_PATH=
 KNOWLEDGE_DIR=
 N8N_CONFIG_PATH=
 RAG_CONFIG_PATH=
 MCP_CONFIG_PATH=
 # ---------------------------------------------------------------
 # Development
 # ---------------------------------------------------------------
 # Set to `1` to enable verbose Fastify logging.
 DEBUG=0
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,39 @@
 node_modules/
 dist/
 .env
 .env.local
 data/*.db
 data/*.db-*
 **/data/*.db
 **/data/*.db-*
 **/data/*.db-shm
 **/data/*.db-wal
 *.log
 # Local agent / codebase caches
 .atl/
 .codebase-memory/
 INDEX.md
 scripts/*.mjs
 !scripts/*.mjs.bak
 # Editor / OS
 .DS_Store
 .idea/
 .vscode/
 # Build artifacts
 apps/api/dist/
 apps/web/dist/
 packages/*/dist/
 # Logs
 logs/
 *.log
 *.pid
 *.seed
 *.pid.lock
 # Coverage
 coverage/
 .nyc_output/
--- a/README.md
+++ b/README.md
@@ -0,0 +1,273 @@
 # SIC — Super Incident Commander
 Lightweight web harness to use a centralized `pi.dev` engine from the browser, with independent sessions, reliable history in SQLite, internal Markdown documentation, and webhooks executed only from the backend after explicit user confirmation.
 ## MVP scope
 - Expected ceiling: 5 concurrent users.
 - Frontend: React + Vite.
 - Backend: Node.js + Fastify.
 - Initial persistence: SQLite.
 - LLM: OpenAI-compatible endpoint via `pi-adapter`.
 - Default LLM provider: MiniMax OpenAI-compatible.
 - Configuration: YAML + environment variables.
 - Initial deploy: Docker Compose.
 ## Reliability principle
 Nothing critical lives only in memory. Sessions, messages, and webhook audit are rebuilt from SQLite.
 Every conversation read/write must respect:
 ```sql
 WHERE session_id = ?
 AND user_id = ?
 ```
 ## Structure
 ```text
 apps/
  api/                  # Fastify API, SSE, sessions, webhooks, docs
  web/                  # React + Vite UI
 packages/
  shared/               # Shared types
  pi-adapter/           # pi.dev / OpenAI-compatible adapter
 config/                 # YAML for models, webhooks and docs
 knowledge/              # Internal Markdown documentation
 deploy/                 # Docker Compose and future manifests
 docs/                   # Definitions, reliable history and agents
 scripts/                # End-to-end smoke test + mock LLM
 ```
 ## API surface
 - `GET /healthz`
 - `GET /readyz`
 - `GET /api/version`
 - `GET /api/me`
 - `GET /api/sessions`
 - `POST /api/sessions`
 - `GET /api/sessions/:id`
 - `PATCH /api/sessions/:id`
 - `DELETE /api/sessions/:id`
 - `GET /api/docs/search?q=vpn`
 - `GET /api/docs/:id`
 - `GET /api/models`
 - `GET /api/webhooks`
 - `GET /api/webhook-runs?sessionId=...`
 - `POST /api/webhooks/:id/run`
 - `GET /api/skills`
 - `PATCH /api/sessions/:id/system-prompt` — set per-session context
 - `GET /metrics` — Prometheus text
 - `GET /api/metrics` — same as JSON
 - `POST /api/chat/stream`
 ## Chat stream contract
 `POST /api/chat/stream` takes `sessionId`, `message` and optionally `model`.
 Reliability rules:
 1. Validate that the session belongs to the current user.
 2. Persist the `user` message before calling the LLM.
 3. If the session has no title yet, derive a short one from the first message.
 4. Validate the requested model against `config/models.yml`.
 5. Search relevant Markdown docs and role-allowed webhooks.
 6. Call the OpenAI-compatible endpoint via `pi-adapter`. If the model has a fallback chain, the chat route walks it on structured or transport errors; the first `ok=true` response wins.
 7. Emit SSE events: `docs`, `token`, `actions`, `done`.
 8. Persist the `assistant` response; if every model in the chain fails, persist a controlled message with error metadata and the full failure trail.
 ### Provider fallback
 Each model in `config/models.yml` can declare `fallback: [other-id, ...]`. The chat route walks the chain when a model returns `ok=false` (no_content / json_parse / schema) or throws (5xx / 429 / network / timeout). When the assistant metadata is persisted, it includes `requested_model`, `fallback_attempts`, `fallback_chain`, and `fallback_failures` whenever the chain was actually used, so you can see what happened in the chat history.
 Override the chain globally with `LLM_FALLBACK_CHAIN` (comma-separated ids, first entry after the requested model). Leave empty to use each model's YAML chain.
 Default chain today (from `config/models.yml`):
 - `fast` → no fallback (it IS the cheap path)
 - `balanced` → `mr-auto`
 - `reasoning` → no fallback
 - `mr-auto` → no fallback
 ## MiniMax
 The project is wired to MiniMax via the official OpenAI-compatible endpoint:
 - Base URL: `https://api.minimax.io/v1`
 - Chat path used by the adapter: `/chat/completions`
 - Auth: `Authorization: Bearer <key>`
 Models configured in `config/models.yml`:
 - `fast` → `MiniMax-M2.7-highspeed`
 - `balanced` → `MiniMax-M2.7`
 - `reasoning` → `MiniMax-M3`
 To run locally, set the key:
 ```bash
 export MINIMAX_API_KEY="your-key"
 export LLM_BASE_URL="https://api.minimax.io/v1"
 export LLM_API_KEY="$MINIMAX_API_KEY"
 export DEFAULT_MODEL="fast"
 ```
 In Docker Compose you only need to export `MINIMAX_API_KEY`; the compose maps it to `LLM_API_KEY`.
 ## UI MVP
 The React app already consumes the API through the Vite proxy:
 - Loads or creates a local session.
 - Loads `GET /api/models` and lets the user pick the model per message.
 - Lists persisted sessions and lets the user switch between them.
 - Lets the user rename and delete sessions, always through the API with per-user isolation.
 - Sends messages to `POST /api/chat/stream` and consumes SSE events.
 - Shows recommended documentation and lets the user open the full document via `GET /api/docs/:id`.
 - Shows suggested actions in the right panel.
 - Loads `GET /api/webhooks` to show public labels/descriptions for actions.
 - Executes webhooks only after user confirmation and always through the backend.
 - Shows execution audit per session from `GET /api/webhook-runs`, without exposing URLs or payload templates.
 - Can attach a development Bearer token to test `AUTH_MODE=keycloak`; reads from `localStorage` or `VITE_AUTH_TOKEN`.
 ## Skills
 Skills are persona/behavior prompt fragments loaded from `config/skills.yml` and injected into the LLM's system prompt at chat time. They are NOT capabilities: the model still only recommends actions and the backend still owns execution.
 Each skill has: `id`, `name`, `description`, `enabled`, `prompt`. Skills with `enabled: true` are injected into the chat system prompt (after the base identity prompt, before the docs/actions context). Skills with `enabled: false` are kept in the file but inactive. The frontend can list them via `GET /api/skills` (no prompt text is exposed publicly — only id, name, description, enabled).
 Edit `config/skills.yml` and restart the API to change the active skill set. The default file ships with `sre-on-call` and `blameless-postmortem` enabled; `security-incident` is shipped disabled as a reference.
 The env var `SKILLS_CONFIG_PATH` overrides the default config path (`../../config/skills.yml` relative to `cwd`).
 ## Per-session context
 Every session has an optional `system_prompt` field. When set, it is prepended to every chat turn as a system message (after the base identity prompt and skill prompts, before the docs/actions context). Use it to pin incident id, on-call name, or runbook references that shouldn't drift across the conversation.
 - **Frontend**: each session row has a small circle button (`○` empty, `●` set). Click it to open a modal editor with Save and Clear.
 - **API**: `PATCH /api/sessions/:id/system-prompt` with `{ "system_prompt": "..." }`. Send `null` or empty string to clear.
 - **Limit**: 8000 characters.
 - **Persistence**: stored in `chat_sessions.system_prompt`; same `WHERE id = ? AND user_id = ?` ownership rule as every other session operation.
 ## Observability
 Two endpoints surface API metrics:
 - `GET /metrics` — Prometheus text exposition (counter / summary), scraper-friendly. Default Prometheus port / scrape target.
 - `GET /api/metrics` — same data as JSON for humans and the smoke test. Shape:
  ```json
  {
    "started_at": "2026-06-29T12:00:00.000Z",
    "uptime_seconds": 1234,
    "totals": { "requests": 5678, "errors_5xx": 0 },
    "routes": [
      {
        "route": "/api/chat/stream",
        "method": "POST",
        "count": 42,
        "avg_ms": 1230,
        "p95_ms": 4500,
        "max_ms": 8000,
        "status_buckets": { "200_299": 42 }
      }
    ],
    "recent": [
      {
        "route": "/api/sessions/:id",
        "method": "DELETE",
        "status": 204,
        "durationMs": 4,
        "timestamp": 1782727300000
      }
    ]
  }
  ```
 Routes are aggregated by route **template** (e.g. `/api/sessions/:id`), not by raw URL, so `/api/sessions/abc` and `/api/sessions/def` share a bucket. p95 uses a fixed-size streaming reservoir (200 samples) so memory stays bounded under traffic. In-memory only — counters reset on restart; that's the expected behavior for a 5-user MVP.
 ## Auth
 The backend supports two modes:
 - `AUTH_MODE=local`: dev mode, uses `local-user` with roles `admin` and `webhook-runner`.
 - `AUTH_MODE=keycloak`: validates `Authorization: Bearer <token>` with remote JWKS from `OIDC_ISSUER` and `OIDC_AUDIENCE`.
 For manual Keycloak testing, the UI lets you paste a JWT in the "Dev token" box. That token is stored in `localStorage` and sent as `Authorization: Bearer <token>` on API and stream calls. Alternatively, Vite can receive `VITE_AUTH_TOKEN` to preconfigure it for the local environment.
 Claims used from Keycloak:
 - `sub` as `user.id`.
 - `preferred_username` and `email` for display.
 - Roles from `realm_access.roles` and `resource_access[OIDC_AUDIENCE].roles`.
 ## Basic hardening
 - `API_BODY_LIMIT_BYTES`: global Fastify body limit. Default: `1048576`.
 - `CHAT_MESSAGE_MAX_CHARS`: chat message and `lastUserMessage` limit on webhooks. Default: `8000`.
 - `CORS_ALLOWED_ORIGINS`: comma-separated list. If unset, open for dev.
 - `LLM_TIMEOUT_MS`: OpenAI-compatible call timeout. Default: `30000`.
 - `WEBHOOK_TIMEOUT_MS`: backend-only webhook execution timeout. Default: `15000`.
 - `WEBHOOK_RETRY_MAX_ATTEMPTS`: retries per webhook on transient errors (5xx, 429, timeout, network). Default: `3`.
 - `WEBHOOK_RETRY_INITIAL_BACKOFF_MS`: initial backoff with exponential growth. Default: `500`.
 - `WEBHOOK_RETRY_MAX_BACKOFF_MS`: backoff cap. Default: `5000`.
 - `WEBHOOK_RUNS_RETENTION_DAYS`: age cutoff for `webhook_runs` rows. Runs older than this are purged on boot and on a timer. Default: `30`. Set to `0` to disable the age pass.
 - `WEBHOOK_RUNS_MAX_PER_USER`: keep at most this many most-recent runs per user. The oldest overflow is purged. Default: `1000`. Set to `0` to disable the cap pass.
 - `WEBHOOK_AUDIT_PURGE_INTERVAL_MS`: how often the janitor runs while the API is up. Default: `3600000` (1 hour). Minimum: `60000` (1 minute).
 - `CHAT_RATE_LIMIT_PER_MINUTE`: per-user rate limit on `POST /api/chat/stream` (token-bucket refill rate). Default: `20`.
 - `CHAT_RATE_LIMIT_BURST`: per-user burst size. Default: `5`. Rejected calls return `429` with `retry-after` in seconds and `x-ratelimit-remaining: 0`.
 - The API adds basic defensive headers: `x-content-type-options`, `referrer-policy`, `x-frame-options`.
 ## End-to-end smoke test
 A smoke script exercises the full API (health, auth, models, docs, webhooks, sessions, SSE stream, message persistence and audit).
 ### With a real LLM (MiniMax)
 ```bash
 # Terminal 1: start the API and the web
 export LLM_BASE_URL=https://api.minimax.io/v1
 export LLM_API_KEY="$MINIMAX_API_KEY"
 export DEFAULT_MODEL=fast
 pnpm dev
 # Terminal 2: smoke test against http://localhost:3000
 pnpm smoke
 ```
 ### With the mock LLM (no key)
 ```bash
 # Terminal 1: start the API and the web pointing at the mock
 pnpm mock:llm &
 export LLM_BASE_URL=http://127.0.0.1:4010/v1
 export LLM_API_KEY=dummy
 export DEFAULT_MODEL=fast
 pnpm dev
 # Terminal 2
 pnpm smoke
 # or in a single step, the script starts the mock internally:
 pnpm smoke:mock
 ```
 Steps covered (in order):
 1. `/healthz`, `/readyz`
 2. `/api/me` (local auth)
 3. `/api/models`
 4. `/api/docs/search` + `/api/docs/:id`
 5. `/api/webhooks`
 6. `POST /api/sessions` + `GET /api/sessions`
 7. `POST /api/chat/stream` and SSE event parsing (`docs`, `token`, `actions`, `done`)
 8. `GET /api/sessions/:id` to confirm the assistant message was persisted
 9. `GET /api/webhook-runs?sessionId=...` to confirm audit listing
 10. `DELETE /api/sessions/:id` (cleanup)
 Optional flags:
 - `pnpm smoke --api-base http://localhost:4000` to point at a different API
 - `pnpm smoke:mock` (alias of `pnpm smoke --mock-llm`) starts the mock inside the script
--- a/apps/api/package.json
+++ b/apps/api/package.json
@@ -0,0 +1,28 @@
 {
  "name": "@pi-chat/api",
  "private": true,
  "version": "0.1.0",
  "type": "module",
  "scripts": {
    "dev": "tsx watch src/server.ts",
    "typecheck": "tsc --noEmit",
    "lint": "tsc --noEmit"
  },
  "dependencies": {
    "@fastify/cors": "^11.0.1",
    "@pi-chat/pi-adapter": "workspace:*",
    "@pi-chat/shared": "workspace:*",
    "better-sqlite3": "^11.10.0",
    "dotenv": "^17.4.2",
    "fastify": "^5.3.3",
    "jose": "^5.10.0",
    "yaml": "^2.7.1",
    "zod": "^3.25.67"
  },
  "devDependencies": {
    "@types/better-sqlite3": "^7.6.13",
    "@types/node": "^22.15.32",
    "tsx": "^4.20.3",
    "typescript": "^5.8.3"
  }
 }
--- a/apps/api/src/auth/index.ts
+++ b/apps/api/src/auth/index.ts
@@ -0,0 +1,76 @@
 import type { AuthUser } from "@pi-chat/shared";
 import type { FastifyRequest } from "fastify";
 import { createRemoteJWKSet, jwtVerify } from "jose";
 import { envString } from "../env.js";
 type KeycloakClaims = {
  sub?: string;
  preferred_username?: string;
  email?: string;
  realm_access?: { roles?: string[] };
  resource_access?: Record<string, { roles?: string[] }>;
 };
 const authMode = () => envString(process.env.AUTH_MODE, "local");
 const oidcIssuer = () => envString(process.env.OIDC_ISSUER, "https://auth.rikrdo.com/realms/homelab");
 const oidcAudience = () => envString(process.env.OIDC_AUDIENCE, "pi-chat");
 let jwks: ReturnType<typeof createRemoteJWKSet> | undefined;
 const getLocalUser = (): AuthUser => ({
  id: "local-user",
  username: "local-user",
  roles: ["admin", "webhook-runner"],
 });
 const bearerTokenFrom = (request: FastifyRequest) => {
  const header = request.headers.authorization;
  if (!header?.startsWith("Bearer ")) {
    throw new Error("auth_missing_bearer_token");
  }
  return header.slice("Bearer ".length).trim();
 };
 const rolesFromClaims = (claims: KeycloakClaims) => {
  const audience = oidcAudience();
  const realmRoles = claims.realm_access?.roles ?? [];
  const clientRoles = claims.resource_access?.[audience]?.roles ?? [];
  return [...new Set([...realmRoles, ...clientRoles])];
 };
 const getKeycloakUser = async (request: FastifyRequest): Promise<AuthUser> => {
  const issuer = oidcIssuer();
  const audience = oidcAudience();
  jwks ??= createRemoteJWKSet(new URL(`${issuer}/protocol/openid-connect/certs`));
  const { payload } = await jwtVerify(bearerTokenFrom(request), jwks, {
    issuer,
    audience,
  });
  const claims = payload as KeycloakClaims;
  if (!claims.sub) {
    throw new Error("auth_missing_subject");
  }
  return {
    id: claims.sub,
    username: claims.preferred_username,
    email: claims.email,
    roles: rolesFromClaims(claims),
  };
 };
 export const getAuthUser = async (request: FastifyRequest): Promise<AuthUser> => {
  if (authMode() === "local") {
    return getLocalUser();
  }
  if (authMode() === "keycloak") {
    return getKeycloakUser(request);
  }
  throw new Error(`auth_mode_not_supported:${authMode()}`);
 };
--- a/apps/api/src/auth/routes.ts
+++ b/apps/api/src/auth/routes.ts
@@ -0,0 +1,6 @@
 import type { FastifyInstance } from "fastify";
 import { getAuthUser } from "./index.js";
 export const registerAuthRoutes = async (app: FastifyInstance) => {
  app.get("/api/me", async (request) => ({ user: await getAuthUser(request) }));
 };
--- a/apps/api/src/chat/routes.ts
+++ b/apps/api/src/chat/routes.ts
@@ -0,0 +1,355 @@
 import { createOpenAICompatiblePiAdapter } from "@pi-chat/pi-adapter";
 import type { InternalDocReference, RecommendedAction } from "@pi-chat/shared";
 import type { FastifyInstance, FastifyReply } from "fastify";
 import { z } from "zod";
 import { getAuthUser } from "../auth/index.js";
 import type { AppDatabase } from "../db/database.js";
 import { createDocsRepository, type KnowledgeSearchResult } from "../docs/repository.js";
 import { findModelDefinition, getDefaultModelId, resolveFallbackChain, resolveModelApiKey } from "../models/config.js";
 import { envNumber } from "../env.js";
 import { chatRateLimiterFromEnv } from "../rate-limit.js";
 import { getEnabledSkillPrompts } from "../skills/config.js";
 import { createMessageRepository, createSessionRepository } from "../sessions/repository.js";
 import { createWebhookAuditRepository } from "../webhooks/audit.js";
 import { canUseWebhook, loadWebhookDefinitions } from "../webhooks/config.js";
 const chatStreamBody = z.object({
  sessionId: z.string().min(1),
  message: z.string().trim().min(1).max(envNumber(process.env.CHAT_MESSAGE_MAX_CHARS, 8_000)),
  model: z.string().trim().default(getDefaultModelId()),
 });
 const sendEvent = (reply: FastifyReply, event: unknown) => {
  reply.raw.write(`data: ${JSON.stringify(event)}\n\n`);
 };
 const sendAnswerTokens = (reply: FastifyReply, answer: string) => {
  for (const token of answer.match(/\S+\s*/g) ?? []) {
    sendEvent(reply, { type: "token", token });
  }
 };
 const toDocReference = (doc: KnowledgeSearchResult): InternalDocReference => ({
  id: doc.id,
  title: doc.title,
  source: doc.source,
  relevance: doc.relevance,
 });
 const shouldAutoTitle = (title: string | null) => !title || title === "New session";
 const titleFromMessage = (message: string) => {
  const normalized = message.replace(/\s+/g, " ").trim();
  return normalized.length > 48 ? `${normalized.slice(0, 45)}...` : normalized;
 };
 // Match a webhook id against a chunk of text. Accepts the full id (with - or _)
 // OR its first word (e.g. id "dns-flush" matches "dns" in "flush the dns cache").
 // Both sides are normalized to lowercase and dashes/underscores → spaces.
 const matchesId = (text: string, id: string): boolean => {
  const normalized = text.toLowerCase().replace(/[-_]+/g, " ");
  const normalizedId = id.toLowerCase().replace(/[-_]+/g, " ");
  if (normalized.includes(normalizedId)) return true;
  const firstWord = normalizedId.split(" ")[0] ?? "";
  if (firstWord.length >= 3 && new RegExp(`\\b${firstWord}\\b`, "i").test(normalized)) {
    return true;
  }
  return false;
 };
 const enrichActionsWithMentions = (
  userMessage: string,
  answer: string,
  recommended: RecommendedAction[],
  available: RecommendedAction[],
 ): RecommendedAction[] => {
  const seen = new Set(recommended.map((action) => action.id));
  const enriched = [...recommended];
  for (const candidate of available) {
    if (seen.has(candidate.id)) continue;
    if (matchesId(userMessage, candidate.id) || matchesId(answer, candidate.id)) {
      enriched.push({
        ...candidate,
        // Lower confidence than LLM-recommended ones so the UI can show the
        // difference if needed; still actionable.
        confidence: 0.4,
        reason: "Mentioned in the conversation",
      });
      seen.add(candidate.id);
    }
  }
  return enriched;
 };
 export const registerChatRoutes = async (app: FastifyInstance, db: AppDatabase) => {
  const sessions = createSessionRepository(db);
  const messages = createMessageRepository(db);
  const docs = createDocsRepository();
  const audit = createWebhookAuditRepository(db);
  const rateLimiter = chatRateLimiterFromEnv();
  app.post("/api/chat/stream", async (request, reply) => {
    const user = await getAuthUser(request);
    const decision = rateLimiter.consume(user.id);
    if (!decision.ok) {
      const retryAfterSec = Math.max(1, Math.ceil(decision.retryAfterMs / 1000));
      app.log.warn({ user: user.id, retryAfterSec }, "chat rate limit exceeded");
      return reply
        .code(429)
        .header("retry-after", String(retryAfterSec))
        .header("x-ratelimit-remaining", "0")
        .send({
          error: "rate_limited",
          retry_after_ms: decision.retryAfterMs,
        });
    }
    reply.header("x-ratelimit-remaining", String(decision.remaining));
    const body = chatStreamBody.parse(request.body);
    const session = sessions.get(user.id, body.sessionId);
    const selectedModel = findModelDefinition(body.model);
    if (!session) {
      return reply.code(404).send({ error: "session_not_found" });
    }
    if (!selectedModel) {
      return reply.code(400).send({ error: "model_not_found" });
    }
 const resolvedKey = resolveModelApiKey(selectedModel);
    app.log.debug(
      {
        model: selectedModel.id,
        keyLen: resolvedKey.length,
        llmKeyLen: (process.env.LLM_API_KEY ?? "").length,
      },
      "llm api key resolved",
    );
    // Build the ordered fallback chain starting at the selected model. The
    // adapter is built fresh per model because base URL, key and provider
    // model name can differ across chain entries.
    const chain = resolveFallbackChain(selectedModel.id);
    const chainModels = chain
      .map((id) => findModelDefinition(id))
      .filter((m): m is NonNullable<ReturnType<typeof findModelDefinition>> => Boolean(m));
    const adapters = chainModels.map((model) => ({
      model,
      pi: createOpenAICompatiblePiAdapter({
        baseUrl: model.base_url || process.env.LLM_BASE_URL || "https://api.minimax.io/v1",
        apiKey: resolveModelApiKey(model),
        defaultModel: model.model,
        maxTokens: model.max_tokens,
      }),
    }));
    reply.raw.writeHead(200, {
      "content-type": "text/event-stream; charset=utf-8",
      "cache-control": "no-cache, no-transform",
      connection: "keep-alive",
    });
    const userMessage = messages.create({
      sessionId: body.sessionId,
      userId: user.id,
      role: "user",
      content: body.message,
      metadata: { model: selectedModel.id },
    });
    if (shouldAutoTitle(session.title)) {
      sessions.updateTitle(user.id, body.sessionId, titleFromMessage(body.message));
    }
    sessions.touch(user.id, body.sessionId);
    const docResults = await docs.search(body.message, 5);
    const internalDocs = docResults.map(toDocReference);
    // Soft usage signal: how often has this user run each webhook in the
    // recent past. Surfaced as `usageHint` so the LLM can prefer frequently
    // used webhooks when ambiguous, and so the UI can show a "Most used" tag.
    const usageSinceDays = Math.max(0, envNumber(process.env.WEBHOOK_USAGE_WINDOW_DAYS, 7));
    const usageSince = usageSinceDays > 0
      ? new Date(Date.now() - usageSinceDays * 86_400_000).toISOString()
      : new Date(0).toISOString();
    const usageMap = usageSinceDays > 0
      ? audit.usageForUserSince(usageSince, user.id)
      : {};
    const formatUsageHint = (webhookId: string): string | null => {
      const stats = usageMap[webhookId];
      if (!stats || stats.runs === 0) return null;
      const successPct = Math.round(stats.successRate * 100);
      return `${stats.runs} run${stats.runs === 1 ? "" : "s"} in last ${usageSinceDays}d, ${successPct}% success`;
    };
    const availableActions: RecommendedAction[] = loadWebhookDefinitions()
      .filter((webhook) => canUseWebhook(user.roles, webhook))
      .map((webhook) => {
        const usageHint = formatUsageHint(webhook.id);
        return {
          type: "webhook" as const,
          id: webhook.id,
          confidence: 0,
          reason: webhook.description ?? webhook.label,
          requires_confirmation: webhook.confirmation_required,
          ...(usageHint ? { usageHint } : {}),
        };
      });
    sendEvent(reply, { type: "docs", docs: docResults });
    try {
      const history = messages
        .listForSession(user.id, body.sessionId)
        .filter((message) => message.id !== userMessage.id)
        .slice(-12)
        .map((message) => ({ role: message.role, content: message.content }));
      const t0 = Date.now();
      // Walk the fallback chain. The first adapter that returns ok=true
      // wins. If a structured error comes back from any one model we move
      // to the next; an exception (network/5xx/timeout) also jumps chain.
      let chat: Awaited<ReturnType<typeof adapters[number]["pi"]["chat"]>> | null = null;
      let usedModelId = selectedModel.id;
      let fallbackAttempts = 0;
      const failures: Array<{ model: string; reason: string; kind?: string }> = [];
      for (const entry of adapters) {
        try {
          const result = await entry.pi.chat({
            message: body.message,
            model: entry.model.model,
            docs: internalDocs,
            availableActions,
            history,
            skillPrompts: getEnabledSkillPrompts(),
            systemPrompt: session.system_prompt,
          });
          if (result.ok) {
            chat = result;
            usedModelId = entry.model.id;
            break;
          }
          // Structured error (no_content / json_parse / schema). Try next.
          failures.push({
            model: entry.model.id,
            kind: result.error.kind,
            reason: result.error.kind === "no_content"
              ? result.error.message
              : result.error.reason,
          });
          fallbackAttempts += 1;
          chat = result; // keep last error for the controlled fallback path
          usedModelId = entry.model.id;
        } catch (error) {
          // Transport / timeout / 5xx — also fall through.
          failures.push({
            model: entry.model.id,
            reason: error instanceof Error ? error.message : String(error),
          });
          fallbackAttempts += 1;
          app.log.warn(
            { model: entry.model.id, err: error },
            "llm call failed, trying next model in fallback chain",
          );
        }
      }
      const durationMs = Date.now() - t0;
      if (!chat) {
        throw new Error("all fallback models failed");
      }
      // The adapter may return ok=true (well-formed JSON) or ok=false with
      // a structured error + safe fallback. In both cases the fallback
      // contains a usable `answer` and (possibly empty) actions; we never
      // throw on a parse/schema problem — those are operational signal, not
      // request failures.
      const result = chat.ok ? chat.result : chat.fallback;
      if (!chat.ok) {
        app.log.warn(
          {
            kind: chat.error.kind,
            reason: chat.error.kind === "no_content" ? chat.error.message : chat.error.reason,
            model: usedModelId,
          },
          "pi-adapter returned a parse/structured error; using safe fallback",
        );
      }
      // Deterministic safety net: if the LLM forgot to put a relevant webhook
      // in `recommended_actions` (common with short user prompts), scan both
      // the user's input and the model's answer for any role-allowed webhook
      // id (or its first word) and synthesize an action so the user can still
      // execute it from the right panel.
      const recommendedActions = enrichActionsWithMentions(
        body.message,
        result.answer,
        result.recommended_actions,
        availableActions,
      );
      sendAnswerTokens(reply, result.answer);
      sendEvent(reply, { type: "actions", actions: recommendedActions });
      const assistantMetadata: Record<string, unknown> = {
        model: usedModelId,
        docs: result.internal_docs,
        actions: result.recommended_actions,
        usage: { ...(chat.usage ?? {}), durationMs },
      };
      if (usedModelId !== selectedModel.id || fallbackAttempts > 0) {
        assistantMetadata.requested_model = selectedModel.id;
        assistantMetadata.fallback_attempts = fallbackAttempts;
        assistantMetadata.fallback_chain = chain;
        assistantMetadata.fallback_failures = failures;
      }
      if (!chat.ok) {
        assistantMetadata.error_kind = chat.error.kind;
        assistantMetadata.error_reason = chat.error.kind === "no_content"
          ? chat.error.message
          : chat.error.reason;
        assistantMetadata.fallback = true;
      }
      messages.create({
        sessionId: body.sessionId,
        userId: user.id,
        role: "assistant",
        content: result.answer,
        metadata: assistantMetadata,
      });
      sessions.touch(user.id, body.sessionId);
      sendEvent(reply, { type: "done" });
      reply.raw.end();
    } catch (error) {
      const message = "I could not complete the model response. The user message has been saved.";
      app.log.error(error);
      messages.create({
        sessionId: body.sessionId,
        userId: user.id,
        role: "assistant",
        content: message,
        metadata: {
          model: selectedModel.id,
          error: error instanceof Error ? error.message : "unknown_error",
        },
      });
      sessions.touch(user.id, body.sessionId);
      sendEvent(reply, { type: "token", token: message });
      sendEvent(reply, { type: "error", error: "llm_error" });
      sendEvent(reply, { type: "done" });
      reply.raw.end();
    }
    return reply;
  });
 };
--- a/apps/api/src/db/database.ts
+++ b/apps/api/src/db/database.ts
@@ -0,0 +1,27 @@
 import Database from "better-sqlite3";
 import { mkdirSync } from "node:fs";
 import { dirname, resolve } from "node:path";
 export type AppDatabase = Database.Database;
 function sqlitePathFromUrl(databaseUrl: string): string {
  if (databaseUrl.startsWith("sqlite:///")) {
    return databaseUrl.replace("sqlite://", "");
  }
  if (databaseUrl.startsWith("sqlite://")) {
    return databaseUrl.replace("sqlite://", "");
  }
  return databaseUrl;
 }
 export function openDatabase(databaseUrl = process.env.DATABASE_URL ?? "sqlite://./data/pi-chat.db"): AppDatabase {
  const filename = resolve(sqlitePathFromUrl(databaseUrl));
  mkdirSync(dirname(filename), { recursive: true });
  const db = new Database(filename);
  db.pragma("journal_mode = WAL");
  db.pragma("foreign_keys = ON");
  return db;
 }
--- a/apps/api/src/db/migrate.ts
+++ b/apps/api/src/db/migrate.ts
@@ -0,0 +1,62 @@
 import type { AppDatabase } from "./database.js";
 export function migrate(db: AppDatabase): void {
  db.exec(`
    CREATE TABLE IF NOT EXISTS chat_sessions (
      id TEXT PRIMARY KEY,
      user_id TEXT NOT NULL,
      title TEXT,
      system_prompt TEXT,
      created_at TEXT NOT NULL,
      updated_at TEXT NOT NULL
    );
    CREATE INDEX IF NOT EXISTS idx_chat_sessions_user_updated
      ON chat_sessions(user_id, updated_at DESC);
    CREATE TABLE IF NOT EXISTS chat_messages (
      id TEXT PRIMARY KEY,
      session_id TEXT NOT NULL,
      user_id TEXT NOT NULL,
      role TEXT NOT NULL,
      content TEXT NOT NULL,
      metadata TEXT,
      created_at TEXT NOT NULL,
      FOREIGN KEY (session_id) REFERENCES chat_sessions(id) ON DELETE CASCADE
    );
    CREATE INDEX IF NOT EXISTS idx_chat_messages_session_user_created
      ON chat_messages(session_id, user_id, created_at ASC);
    CREATE TABLE IF NOT EXISTS webhook_runs (
      id TEXT PRIMARY KEY,
      webhook_id TEXT NOT NULL,
      user_id TEXT NOT NULL,
      session_id TEXT NOT NULL,
      status TEXT NOT NULL,
      request_payload TEXT,
      response_status INTEGER,
      attempts INTEGER NOT NULL DEFAULT 1,
      created_at TEXT NOT NULL,
      FOREIGN KEY (session_id) REFERENCES chat_sessions(id) ON DELETE CASCADE
    );
    CREATE INDEX IF NOT EXISTS idx_webhook_runs_session_user_created
      ON webhook_runs(session_id, user_id, created_at DESC);
  `);
  // Idempotent additive migrations for existing DBs.
  const webhookRunColumns = db
    .prepare("PRAGMA table_info(webhook_runs)")
    .all() as Array<{ name: string }>;
  if (!webhookRunColumns.some((column) => column.name === "attempts")) {
    db.exec("ALTER TABLE webhook_runs ADD COLUMN attempts INTEGER NOT NULL DEFAULT 1");
  }
  const sessionColumns = db
    .prepare("PRAGMA table_info(chat_sessions)")
    .all() as Array<{ name: string }>;
  if (!sessionColumns.some((column) => column.name === "system_prompt")) {
    db.exec("ALTER TABLE chat_sessions ADD COLUMN system_prompt TEXT");
  }
 }
--- a/apps/api/src/docs/repository.ts
+++ b/apps/api/src/docs/repository.ts
@@ -0,0 +1,255 @@
 import { readdirSync, readFileSync, statSync } from "node:fs";
 import { relative, resolve } from "node:path";
 import YAML from "yaml";
 import { envString } from "../env.js";
 import { loadRagConfig } from "../rag/config.js";
 import { getViaRag, isRagRemote, searchViaRag } from "../rag/client.js";
 export type KnowledgeDoc = {
  id: string;
  title: string;
  source: string;
  tags: string[];
  owner?: string;
  updated?: string;
  headings: string[];
  content: string;
 };
 export type KnowledgeSearchResult = Omit<KnowledgeDoc, "content"> & {
  relevance: number;
  excerpt: string;
 };
 const defaultKnowledgeDir = () => resolve(process.cwd(), "../../knowledge");
 const normalizePathId = (source: string) =>
  source.replace(/\.md$/i, "").split(/[\\/]/g).join(":");
 const walkMarkdownFiles = (dir: string): string[] => {
  const entries = readdirSync(dir, { withFileTypes: true });
  return entries.flatMap((entry) => {
    const fullPath = resolve(dir, entry.name);
    if (entry.isDirectory()) {
      return walkMarkdownFiles(fullPath);
    }
    if (entry.isFile() && entry.name.endsWith(".md")) {
      return [fullPath];
    }
    return [];
  });
 };
 const parseFrontmatter = (raw: string) => {
  if (!raw.startsWith("---")) {
    return { metadata: {}, body: raw };
  }
  const end = raw.indexOf("\n---", 3);
  if (end === -1) {
    return { metadata: {}, body: raw };
  }
  const frontmatter = raw.slice(3, end).trim();
  const body = raw.slice(end + 4).trim();
  const metadata = YAML.parse(frontmatter) ?? {};
  return { metadata, body };
 };
 const extractHeadings = (body: string) =>
  body
    .split("\n")
    .filter((line) => line.startsWith("#"))
    .map((line) => line.replace(/^#+\s*/, "").trim())
    .filter(Boolean);
 const tokenize = (value: string) =>
  value
    .toLowerCase()
    .split(/[^a-z0-9]+/i)
    .map((token) => token.trim())
    .filter((token) => token.length >= 2);
 const scoreDoc = (doc: KnowledgeDoc, query: string) => {
  const tokens = tokenize(query);
  if (tokens.length === 0) {
    return 0.1;
  }
  const title = doc.title.toLowerCase();
  const tags = doc.tags.join(" ").toLowerCase();
  const headings = doc.headings.join(" ").toLowerCase();
  const source = doc.source.toLowerCase();
  const content = doc.content.toLowerCase();
  return tokens.reduce((score, token) => {
    if (title.includes(token)) score += 5;
    if (tags.includes(token)) score += 4;
    if (headings.includes(token)) score += 3;
    if (source.includes(token)) score += 2;
    if (content.includes(token)) score += 1;
    return score;
  }, 0);
 };
 const excerptFor = (content: string, query: string) => {
  const token = tokenize(query)[0];
  const compact = content.replace(/\s+/g, " ").trim();
  if (!token) return compact.slice(0, 220);
  const index = compact.toLowerCase().indexOf(token);
  if (index === -1) return compact.slice(0, 220);
  return compact.slice(Math.max(0, index - 80), index + 140);
 };
 // RAG-driven tag filter: a doc matches if it has at least one of the
 // includeTags (if any) and none of the excludeTags.
 const passesTagFilter = (
  doc: KnowledgeSearchResult,
  includeTags: string[],
  excludeTags: string[],
 ): boolean => {
  if (includeTags.length > 0) {
    const hasIncluded = doc.tags.some((tag) => includeTags.includes(tag));
    if (!hasIncluded) return false;
  }
  if (excludeTags.length > 0) {
    const hasExcluded = doc.tags.some((tag) => excludeTags.includes(tag));
    if (hasExcluded) return false;
  }
  return true;
 };
 export const loadKnowledgeDocs = (knowledgeDir = envString(process.env.KNOWLEDGE_DIR, defaultKnowledgeDir())): KnowledgeDoc[] => {
  const root = resolve(knowledgeDir);
  try {
    statSync(root);
  } catch {
    return [];
  }
  return walkMarkdownFiles(root).map((filePath) => {
    const source = relative(root, filePath);
    const raw = readFileSync(filePath, "utf8");
    const { metadata, body } = parseFrontmatter(raw);
    const data = metadata as Record<string, unknown>;
    const headings = extractHeadings(body);
    return {
      id: normalizePathId(source),
      title: typeof data.title === "string" ? data.title : headings[0] ?? source,
      source,
      tags: Array.isArray(data.tags) ? data.tags.map(String) : [],
      owner: typeof data.owner === "string" ? data.owner : undefined,
      updated: typeof data.updated === "string" ? data.updated : undefined,
      headings,
      content: body,
    };
  });
 };
 export const createDocsRepository = () => {
  const rag = loadRagConfig();
  const useRemote = isRagRemote(rag);
  return {
    async search(query: string, limit?: number): Promise<KnowledgeSearchResult[]> {
      const effectiveLimit = Math.max(1, limit ?? rag.topK);
      if (useRemote) {
        try {
          return await searchViaRag(rag, query, effectiveLimit);
        } catch (error) {
          if (!rag.fallbackToLocal) throw error;
        }
      }
      // Local fallback: read from knowledge/ and apply token-overlap scoring.
      const fullDocs = loadKnowledgeDocs().filter((doc) =>
        passesTagFilter(
          {
            id: doc.id,
            title: doc.title,
            source: doc.source,
            tags: doc.tags,
            owner: doc.owner,
            updated: doc.updated,
            headings: doc.headings,
            relevance: 0,
            excerpt: "",
          },
          rag.includeTags,
          rag.excludeTags,
        ),
      );
      const scored = fullDocs
        .map((doc) => ({
          ...doc,
          relevance: scoreDoc(doc, query),
          excerpt: excerptFor(doc.content, query),
        }))
        .filter((doc) => doc.relevance >= rag.minRelevance && doc.relevance > 0);
      return scored
        .sort((a, b) => b.relevance - a.relevance || a.title.localeCompare(b.title))
        .slice(0, effectiveLimit)
        .map(({ content: _content, ...doc }) => doc);
    },
    async get(id: string): Promise<KnowledgeDoc | undefined> {
      if (useRemote) {
        try {
          return await getViaRag(rag, id);
        } catch (error) {
          if (!rag.fallbackToLocal) throw error;
        }
      }
      return loadKnowledgeDocs().find((doc) => doc.id === id);
    },
    async list(limit = 500): Promise<KnowledgeSearchResult[]> {
      const candidates = (await getAllMetadataLocal())
        .filter((doc) => passesTagFilter(doc, rag.includeTags, rag.excludeTags))
        .slice(0, limit);
      return candidates;
    },
    async count(): Promise<number> {
      const candidates = await getAllMetadataLocal();
      return candidates.filter((doc) =>
        passesTagFilter(doc, rag.includeTags, rag.excludeTags),
      ).length;
    },
  };
 };
 const getAllMetadataLocal = async (): Promise<KnowledgeSearchResult[]> => {
  const rag = loadRagConfig();
  const useRemote = isRagRemote(rag);
  if (useRemote) {
    try {
      return await searchViaRag(rag, "", 1000);
    } catch (error) {
      if (!rag.fallbackToLocal) throw error;
    }
  }
  const docs = loadKnowledgeDocs();
  return docs.map((doc) => ({
    id: doc.id,
    title: doc.title,
    source: doc.source,
    tags: doc.tags,
    owner: doc.owner,
    updated: doc.updated,
    headings: doc.headings,
    relevance: 0,
    excerpt: "",
  }));
 };
--- a/apps/api/src/docs/routes.ts
+++ b/apps/api/src/docs/routes.ts
@@ -0,0 +1,41 @@
 import type { FastifyInstance } from "fastify";
 import { z } from "zod";
 import { createDocsRepository } from "./repository.js";
 const searchQuery = z.object({
  q: z.string().trim().default(""),
  limit: z.coerce.number().int().min(1).max(20).default(5),
 });
 const listQuery = z.object({
  limit: z.coerce.number().int().min(1).max(1000).default(500),
 });
 export const registerDocsRoutes = async (app: FastifyInstance) => {
  const docs = createDocsRepository();
  app.get("/api/docs", async (request) => {
    const query = listQuery.parse(request.query);
    // Await explicitly so Fastify serializes a real array, not an
    // unresolved Promise which would be `{}` in the response body.
    const [items, total] = await Promise.all([docs.list(query.limit), docs.count()]);
    return { items, total };
  });
  app.get("/api/docs/search", async (request) => {
    const query = searchQuery.parse(request.query);
    const items = await docs.search(query.q, query.limit);
    return { items };
  });
  app.get("/api/docs/:id", async (request, reply) => {
    const params = z.object({ id: z.string().min(1) }).parse(request.params);
    const doc = docs.get(params.id);
    if (!doc) {
      return reply.code(404).send({ error: "doc_not_found" });
    }
    return doc;
  });
 };
--- a/apps/api/src/env.ts
+++ b/apps/api/src/env.ts
@@ -0,0 +1,20 @@
 /**
 * Small env helpers.
 *
 * `process.env.X ?? fallback` does NOT fall back on empty strings — only on
 * undefined/null. That bites us when dotenv loads `KEY=` (blank value) from
 * `.env`. Use `envString` / `envNumber` to get safe fallbacks.
 */
 export const envString = (value: string | undefined | null, fallback: string): string => {
  if (value === undefined || value === null) return fallback;
  return value.length > 0 ? value : fallback;
 };
 export const envNumber = (value: string | undefined | null, fallback: number): number => {
  if (value === undefined || value === null) return fallback;
  const trimmed = value.trim();
  if (trimmed.length === 0) return fallback;
  const parsed = Number(trimmed);
  return Number.isFinite(parsed) ? parsed : fallback;
 };
--- a/apps/api/src/mcp/config.ts
+++ b/apps/api/src/mcp/config.ts
@@ -0,0 +1,106 @@
 import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
 import { parse } from "yaml";
 import { envString } from "../env.js";
 export type McpToolParameterSchema = {
  type: "object";
  required?: string[];
  properties?: Record<string, unknown>;
 };
 export type McpToolDefinition = {
  id: string;
  name: string;
  description: string;
  server: string | null;
  parameters: McpToolParameterSchema;
  tags: string[];
  enabled: boolean;
 };
 export type McpServerDefinition = {
  id: string;
  name: string;
  description: string;
  endpoint: string;
 };
 export type PublicMcpToolDefinition = Omit<McpToolDefinition, "server"> & {
  server: string | null;
 };
 export type PublicMcpServerDefinition = McpServerDefinition;
 type McpFile = {
  mcp_servers?: McpServerDefinition[];
  mcp_tools?: McpToolDefinition[];
 };
 const defaultPath = (): string =>
  envString(process.env.MCP_CONFIG_PATH, resolve(process.cwd(), "../../config/mcp.yml"));
 const isToolParameterSchema = (value: unknown): value is McpToolParameterSchema => {
  if (!value || typeof value !== "object") return false;
  const v = value as McpToolParameterSchema;
  return v.type === "object";
 };
 export const loadMcpTools = (
  configPath: string = defaultPath(),
 ): McpToolDefinition[] => {
  let raw: string;
  try {
    raw = readFileSync(configPath, "utf8");
  } catch (error) {
    const code = (error as NodeJS.ErrnoException).code;
    if (code === "ENOENT") return [];
    throw error;
  }
  const parsed = parse(raw) as McpFile | null;
  if (!parsed || !Array.isArray(parsed.mcp_tools)) return [];
  return parsed.mcp_tools
    .filter((tool) => tool && typeof tool === "object" && typeof tool.id === "string")
    .map((tool) => ({
      id: String(tool.id).trim(),
      name: String(tool.name ?? tool.id).trim(),
      description: String(tool.description ?? "").trim(),
      server: typeof tool.server === "string" ? tool.server : null,
      parameters: isToolParameterSchema(tool.parameters)
        ? tool.parameters
        : ({ type: "object", properties: {}, required: [] } satisfies McpToolParameterSchema),
      tags: Array.isArray(tool.tags) ? tool.tags.map(String) : [],
      enabled: tool.enabled !== false,
    }))
    .filter((tool) => tool.id.length > 0);
 };
 export const loadMcpServers = (
  configPath: string = defaultPath(),
 ): McpServerDefinition[] => {
  let raw: string;
  try {
    raw = readFileSync(configPath, "utf8");
  } catch (error) {
    const code = (error as NodeJS.ErrnoException).code;
    if (code === "ENOENT") return [];
    throw error;
  }
  const parsed = parse(raw) as McpFile | null;
  if (!parsed || !Array.isArray(parsed.mcp_servers)) return [];
  return parsed.mcp_servers
    .filter((s) => s && typeof s === "object" && typeof s.id === "string")
    .map((s) => ({
      id: String(s.id).trim(),
      name: String(s.name ?? s.id).trim(),
      description: String(s.description ?? "").trim(),
      endpoint: String(s.endpoint ?? "").trim(),
    }))
    .filter((s) => s.id.length > 0);
 };
 export const enabledMcpTools = (tools: McpToolDefinition[] = loadMcpTools()): McpToolDefinition[] =>
  tools.filter((tool) => tool.enabled);
 export const toPublicMcpTool = (tool: McpToolDefinition): PublicMcpToolDefinition => ({ ...tool });
 export const toPublicMcpServer = (server: McpServerDefinition): PublicMcpServerDefinition => ({ ...server });
--- a/apps/api/src/mcp/routes.ts
+++ b/apps/api/src/mcp/routes.ts
@@ -0,0 +1,19 @@
 import type { FastifyInstance } from "fastify";
 import {
  enabledMcpTools,
  loadMcpServers,
  toPublicMcpServer,
  toPublicMcpTool,
 } from "./config.js";
 export const registerMcpRoutes = async (app: FastifyInstance) => {
  app.get("/api/mcp/tools", async () => {
    const items = enabledMcpTools().map(toPublicMcpTool);
    return { items };
  });
  app.get("/api/mcp/servers", async () => {
    const items = loadMcpServers().map(toPublicMcpServer);
    return { items };
  });
 };
--- a/apps/api/src/metrics.ts
+++ b/apps/api/src/metrics.ts
@@ -0,0 +1,178 @@
 // Tiny in-process metrics. Thread-safe enough for a 5-user MVP — counters
 // and sums are only ever incremented under a single-threaded Node event
 // loop, no atomic ops required.
 export type RouteMetric = {
  route: string;
  method: string;
  status: number;
  durationMs: number;
  timestamp: number;
 };
 type Aggregate = {
  count: number;
  statusBuckets: Map<number, number>;
  sumMs: number;
  maxMs: number;
  p95Slots: number[]; // simple streaming reservoir for a coarse p95
 };
 const RESERVOIR_SIZE = 200;
 const createAggregate = (): Aggregate => ({
  count: 0,
  statusBuckets: new Map(),
  sumMs: 0,
  maxMs: 0,
  p95Slots: [],
 });
 const metricsState = {
  startedAt: Date.now(),
  aggregates: new Map<string, Aggregate>(),
  // Last N events for the /metrics JSON inspector. Bounded to avoid leaks.
  recent: [] as RouteMetric[],
  recentLimit: 50,
  errorCounts: new Map<string, number>(),
 };
 const keyFor = (route: string, method: string) => `${method.toUpperCase()} ${route}`;
 export const observeHttp = (metric: RouteMetric) => {
  const key = keyFor(metric.route, metric.method);
  let agg = metricsState.aggregates.get(key);
  if (!agg) {
    agg = createAggregate();
    metricsState.aggregates.set(key, agg);
  }
  agg.count += 1;
  agg.sumMs += metric.durationMs;
  if (metric.durationMs > agg.maxMs) agg.maxMs = metric.durationMs;
  const statusBucket = Math.floor(metric.status / 100) * 100;
  agg.statusBuckets.set(statusBucket, (agg.statusBuckets.get(statusBucket) ?? 0) + 1);
  if (agg.p95Slots.length < RESERVOIR_SIZE) {
    agg.p95Slots.push(metric.durationMs);
  } else {
    // Cheap replacement: evict the current max so the reservoir tracks the slowest N.
    let maxIdx = 0;
    for (let i = 1; i < agg.p95Slots.length; i++) {
      if (agg.p95Slots[i] > agg.p95Slots[maxIdx]) maxIdx = i;
    }
    if (metric.durationMs < agg.p95Slots[maxIdx]) {
      agg.p95Slots[maxIdx] = metric.durationMs;
    }
  }
  if (metric.status >= 500) {
    metricsState.errorCounts.set(key, (metricsState.errorCounts.get(key) ?? 0) + 1);
  }
  metricsState.recent.push(metric);
  if (metricsState.recent.length > metricsState.recentLimit) {
    metricsState.recent.splice(0, metricsState.recent.length - metricsState.recentLimit);
  }
 };
 const percentile = (sorted: number[], p: number): number => {
  if (sorted.length === 0) return 0;
  const idx = Math.min(sorted.length - 1, Math.floor((p / 100) * sorted.length));
  return sorted[idx];
 };
 export const snapshotMetrics = () => {
  const routes: Array<{
    route: string;
    method: string;
    count: number;
    avg_ms: number;
    p95_ms: number;
    max_ms: number;
    status_buckets: Record<string, number>;
  }> = [];
  for (const [key, agg] of metricsState.aggregates.entries()) {
    const [method, ...rest] = key.split(" ");
    const route = rest.join(" ");
    const sorted = [...agg.p95Slots].sort((a, b) => a - b);
    routes.push({
      route,
      method,
      count: agg.count,
      avg_ms: agg.count === 0 ? 0 : Math.round(agg.sumMs / agg.count),
      p95_ms: Math.round(percentile(sorted, 95)),
      max_ms: agg.maxMs,
      status_buckets: Object.fromEntries(
        [...agg.statusBuckets.entries()].map(([k, v]) => [`${k}_${k + 99}`, v]),
      ),
    });
  }
  return {
    started_at: new Date(metricsState.startedAt).toISOString(),
    uptime_seconds: Math.round((Date.now() - metricsState.startedAt) / 1000),
    totals: {
      requests: [...metricsState.aggregates.values()].reduce((sum, a) => sum + a.count, 0),
      errors_5xx: [...metricsState.errorCounts.values()].reduce((sum, n) => sum + n, 0),
    },
    routes: routes.sort((a, b) => b.count - a.count),
    recent: [...metricsState.recent].reverse(),
  };
 };
 // Tiny Prometheus-style exposition. Stable enough for a scraper.
 export const renderPrometheusText = (): string => {
  const lines: string[] = [];
  lines.push("# HELP sic_uptime_seconds Seconds since the API process started");
  lines.push("# TYPE sic_uptime_seconds gauge");
  lines.push(`sic_uptime_seconds ${Math.round((Date.now() - metricsState.startedAt) / 1000)}`);
  lines.push("");
  lines.push("# HELP sic_http_requests_total Total HTTP requests, labelled by route, method, status");
  lines.push("# TYPE sic_http_requests_total counter");
  for (const [key, agg] of metricsState.aggregates.entries()) {
    const [method, ...rest] = key.split(" ");
    const route = rest.join(" ");
    for (const [bucket, count] of agg.statusBuckets.entries()) {
      const statusClass = `${bucket}_${bucket + 99}`;
      lines.push(
        `sic_http_requests_total{route="${route}",method="${method}",status_class="${statusClass}"} ${count}`,
      );
    }
  }
  lines.push("");
  lines.push("# HELP sic_http_request_duration_ms Request duration in ms");
  lines.push("# TYPE sic_http_request_duration_ms summary");
  for (const [key, agg] of metricsState.aggregates.entries()) {
    const [method, ...rest] = key.split(" ");
    const route = rest.join(" ");
    const sorted = [...agg.p95Slots].sort((a, b) => a - b);
    const avg = agg.count === 0 ? 0 : Math.round(agg.sumMs / agg.count);
    lines.push(
      `sic_http_request_duration_ms{route="${route}",method="${method}",quantile="0.95"} ${percentile(sorted, 95)}`,
    );
    lines.push(
      `sic_http_request_duration_ms_sum{route="${route}",method="${method}"} ${agg.sumMs}`,
    );
    lines.push(
      `sic_http_request_duration_ms_count{route="${route}",method="${method}"} ${agg.count}`,
    );
    lines.push(
      `sic_http_request_duration_ms_max{route="${route}",method="${method}"} ${agg.maxMs}`,
    );
    lines.push(
      `sic_http_request_duration_ms_avg{route="${route}",method="${method}"} ${avg}`,
    );
  }
  return `${lines.join("\n")}\n`;
 };
 export const __resetMetricsForTests = () => {
  metricsState.startedAt = Date.now();
  metricsState.aggregates.clear();
  metricsState.recent.length = 0;
  metricsState.errorCounts.clear();
 };
--- a/apps/api/src/models/config.ts
+++ b/apps/api/src/models/config.ts
@@ -0,0 +1,105 @@
 import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
 import { parse } from "yaml";
 import { envString } from "../env.js";
 export type ModelDefinition = {
  id: string;
  label: string;
  provider: "openai-compatible";
  base_url?: string;
  api_key_env?: string;
  model: string;
  max_tokens?: number;
  // Ordered list of model ids to try if this one fails (5xx, 429, network
  // error, timeout). Each id must resolve to a known model; unknown ids are
  // dropped at load time. Use ["mr-auto"] for a single fallback. The
  // fallback chain for the chain itself is resolved at request time by
  // `resolveFallbackChain`.
  fallback?: string[];
 };
 export type PublicModelDefinition = Pick<ModelDefinition, "id" | "label" | "provider" | "max_tokens">;
 // Resolve the API key for a model. Order of precedence:
 //   1. Per-model env var (model.api_key_env) — useful when different providers
 //      use different keys (e.g. local proxy, dedicated self-hosted).
 //   2. Global LLM_API_KEY / MINIMAX_API_KEY fallbacks shared by all models.
 //   3. Literal "dummy" so OpenAI-compatible servers that don't require auth
 //      (e.g. local OLLAMA, self-hosted reverse proxy) still work out of the box.
 export const resolveModelApiKey = (model: ModelDefinition): string => {
  const fromModel = model.api_key_env ? process.env[model.api_key_env] : undefined;
  if (fromModel && fromModel.trim().length > 0) return fromModel.trim();
  return (
    envString(process.env.LLM_API_KEY, envString(process.env.MINIMAX_API_KEY, "dummy"))
  );
 };
 const expandEnv = (value: string | undefined) =>
  value?.replace(/\$\{([A-Z0-9_]+)\}/g, (_match, key: string) => process.env[key] ?? "");
 export const loadModelDefinitions = (): ModelDefinition[] => {
  const configPath = envString(process.env.MODELS_CONFIG_PATH, resolve(process.cwd(), "../../config/models.yml"));
  const parsed = parse(readFileSync(configPath, "utf8")) as { models?: ModelDefinition[] } | null;
  const known = new Set((parsed?.models ?? []).map((model) => String(model.id ?? "").trim()));
  return (parsed?.models ?? []).map((model) => {
    const rawFallback = Array.isArray(model.fallback) ? model.fallback : [];
    const fallback = rawFallback
      .map((id) => String(id).trim())
      .filter((id) => id.length > 0 && known.has(id) && id !== model.id);
    return {
      ...model,
      base_url: expandEnv(model.base_url),
      fallback,
    };
  });
 };
 export const getDefaultModelId = () => envString(process.env.DEFAULT_MODEL, "fast");
 export const findModelDefinition = (modelId: string) =>
  loadModelDefinitions().find((model) => model.id === modelId);
 /**
 * Resolve the ordered fallback chain starting at `modelId`. Walks each model's
 * `fallback` array until exhausted, dedupes by id, and stops if a cycle is
 * detected. The starting model is always first. If the model is unknown the
 * chain is just `[modelId]` (caller will surface model_not_found).
 */
 export const resolveFallbackChain = (modelId: string): string[] => {
  const all = loadModelDefinitions();
  const byId = new Map(all.map((model) => [model.id, model]));
  const chain: string[] = [];
  const seen = new Set<string>();
  // LLM_FALLBACK_CHAIN (comma-separated) overrides the YAML chain for the
  // selected model. Empty / unset means "use the YAML chain".
  const override = envString(process.env.LLM_FALLBACK_CHAIN, "")
    .split(",")
    .map((id) => id.trim())
    .filter((id) => id.length > 0);
  let cursor: string | undefined = modelId;
  let nextCursor: string | undefined = override[0];
  while (cursor && !seen.has(cursor)) {
    seen.add(cursor);
    chain.push(cursor);
    const model = byId.get(cursor);
    if (nextCursor !== undefined) {
      cursor = nextCursor;
      nextCursor = undefined;
      continue;
    }
    if (!model || !model.fallback || model.fallback.length === 0) break;
    cursor = model.fallback[0];
  }
  return chain;
 };
 export const toPublicModel = (model: ModelDefinition): PublicModelDefinition => ({
  id: model.id,
  label: model.label,
  provider: model.provider,
  max_tokens: model.max_tokens,
 });
--- a/apps/api/src/models/routes.ts
+++ b/apps/api/src/models/routes.ts
@@ -0,0 +1,14 @@
 import type { FastifyInstance } from "fastify";
 import { getDefaultModelId, loadModelDefinitions, toPublicModel } from "./config.js";
 export const registerModelRoutes = async (app: FastifyInstance) => {
  app.get("/api/models", async () => {
    const defaultModelId = getDefaultModelId();
    const items = loadModelDefinitions().map(toPublicModel);
    return {
      default_model: items.some((model) => model.id === defaultModelId) ? defaultModelId : items[0]?.id,
      items,
    };
  });
 };
--- a/apps/api/src/n8n/config.ts
+++ b/apps/api/src/n8n/config.ts
@@ -0,0 +1,58 @@
 import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
 import { parse } from "yaml";
 import { envString } from "../env.js";
 export type N8nWorkflowDefinition = {
  id: string;
  label: string;
  description: string;
  url: string;
  required_roles: string[];
  tags: string[];
 };
 export type PublicN8nWorkflowDefinition = N8nWorkflowDefinition;
 type N8nFile = { n8n_workflows?: N8nWorkflowDefinition[] };
 const expandEnv = (value: string): string =>
  value.replace(/\$\{([A-Z0-9_]+)(?::\?[^}]+)?\}/g, (_match, name: string) => process.env[name] ?? "");
 const defaultPath = (): string =>
  envString(process.env.N8N_CONFIG_PATH, resolve(process.cwd(), "../../config/n8n-workflows.yml"));
 export const loadN8nWorkflows = (
  configPath: string = defaultPath(),
 ): N8nWorkflowDefinition[] => {
  let raw: string;
  try {
    raw = readFileSync(configPath, "utf8");
  } catch (error) {
    const code = (error as NodeJS.ErrnoException).code;
    if (code === "ENOENT") return [];
    throw error;
  }
  const parsed = parse(raw) as N8nFile | null;
  if (!parsed || !Array.isArray(parsed.n8n_workflows)) return [];
  return parsed.n8n_workflows
    .filter((wf) => wf && typeof wf === "object" && typeof wf.id === "string")
    .map((wf) => ({
      id: String(wf.id).trim(),
      label: String(wf.label ?? wf.id).trim(),
      description: String(wf.description ?? "").trim(),
      url: expandEnv(String(wf.url ?? "").trim()),
      required_roles: Array.isArray(wf.required_roles)
        ? wf.required_roles.map(String)
        : [],
      tags: Array.isArray(wf.tags) ? wf.tags.map(String) : [],
    }))
    .filter((wf) => wf.id.length > 0);
 };
 export const canUseN8nWorkflow = (userRoles: string[], wf: N8nWorkflowDefinition): boolean => {
  if (!Array.isArray(wf.required_roles) || wf.required_roles.length === 0) return true;
  return wf.required_roles.every((role) => userRoles.includes(role));
 };
 export const toPublicN8nWorkflow = (wf: N8nWorkflowDefinition): PublicN8nWorkflowDefinition => ({ ...wf });
--- a/apps/api/src/n8n/routes.ts
+++ b/apps/api/src/n8n/routes.ts
@@ -0,0 +1,13 @@
 import type { FastifyInstance } from "fastify";
 import { canUseN8nWorkflow, loadN8nWorkflows, toPublicN8nWorkflow } from "./config.js";
 import { getAuthUser } from "../auth/index.js";
 export const registerN8nRoutes = async (app: FastifyInstance) => {
  app.get("/api/n8n-workflows", async (request) => {
    const user = await getAuthUser(request);
    const items = loadN8nWorkflows()
      .filter((wf) => canUseN8nWorkflow(user.roles, wf))
      .map(toPublicN8nWorkflow);
    return { items };
  });
 };
--- a/apps/api/src/rag/client.ts
+++ b/apps/api/src/rag/client.ts
@@ -0,0 +1,105 @@
 import type { KnowledgeDoc, KnowledgeSearchResult } from "../docs/repository.js";
 import type { RagConfig } from "./config.js";
 export type RagSearchResponse = {
  items: Array<{
    id: string;
    title?: string;
    source?: string;
    tags?: string[];
    relevance?: number;
    excerpt?: string;
    content?: string;
  }>;
 };
 export type RagGetResponse = Partial<KnowledgeDoc> & { id: string };
 const ensureTrailing = (url: string) => url.replace(/\/$/, "");
 export const isRagRemote = (config: RagConfig): boolean => config.endpoint.trim().length > 0;
 const buildHeaders = (config: RagConfig): Record<string, string> => {
  const headers: Record<string, string> = {
    "content-type": "application/json",
    accept: "application/json",
  };
  if (config.authToken) {
    headers.authorization = `Bearer ${config.authToken}`;
  }
  return headers;
 };
 export const searchViaRag = async (
  config: RagConfig,
  query: string,
  limit: number,
 ): Promise<KnowledgeSearchResult[]> => {
  const url = `${ensureTrailing(config.endpoint)}/search`;
  const controller = new AbortController();
  const timeout = setTimeout(() => controller.abort(), config.timeoutMs);
  try {
    const response = await fetch(url, {
      method: "POST",
      headers: buildHeaders(config),
      body: JSON.stringify({
        query,
        limit,
        min_relevance: config.minRelevance,
        include_tags: config.includeTags,
        exclude_tags: config.excludeTags,
      }),
      signal: controller.signal,
    });
    if (!response.ok) {
      throw new Error(`rag_search_failed:${response.status}`);
    }
    const data = (await response.json()) as RagSearchResponse;
    if (!data || !Array.isArray(data.items)) return [];
    return data.items.map((item) => ({
      id: String(item.id),
      title: String(item.title ?? item.id),
      source: String(item.source ?? ""),
      tags: Array.isArray(item.tags) ? item.tags.map(String) : [],
      relevance: Number(item.relevance ?? 0),
      excerpt: String(item.excerpt ?? ""),
      headings: [],
    }));
  } finally {
    clearTimeout(timeout);
  }
 };
 export const getViaRag = async (
  config: RagConfig,
  id: string,
 ): Promise<KnowledgeDoc | undefined> => {
  const url = `${ensureTrailing(config.endpoint)}/docs/${encodeURIComponent(id)}`;
  const controller = new AbortController();
  const timeout = setTimeout(() => controller.abort(), config.timeoutMs);
  try {
    const response = await fetch(url, {
      method: "GET",
      headers: buildHeaders(config),
      signal: controller.signal,
    });
    if (response.status === 404) return undefined;
    if (!response.ok) {
      throw new Error(`rag_get_failed:${response.status}`);
    }
    const data = (await response.json()) as RagGetResponse;
    if (!data || !data.id) return undefined;
    return {
      id: String(data.id),
      title: String(data.title ?? data.id),
      source: String(data.source ?? ""),
      tags: Array.isArray(data.tags) ? data.tags.map(String) : [],
      owner: typeof data.owner === "string" ? data.owner : undefined,
      updated: typeof data.updated === "string" ? data.updated : undefined,
      headings: Array.isArray(data.headings) ? data.headings.map(String) : [],
      content: String(data.content ?? ""),
    };
  } finally {
    clearTimeout(timeout);
  }
 };
--- a/apps/api/src/rag/config.ts
+++ b/apps/api/src/rag/config.ts
@@ -0,0 +1,80 @@
 import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
 import { parse } from "yaml";
 import { envString } from "../env.js";
 export type RagChunkStrategy = "heading" | "paragraph" | "fixed";
 export type RagConfig = {
  endpoint: string;
  authToken: string;
  timeoutMs: number;
  fallbackToLocal: boolean;
  chunkStrategy: RagChunkStrategy;
  chunkSizeChars: number;
  topK: number;
  minRelevance: number;
  includeTags: string[];
  excludeTags: string[];
 };
 export type PublicRagConfig = Omit<RagConfig, "authToken"> & {
  // Never expose the auth token over the public API; show only whether
  // one is configured.
  hasAuthToken: boolean;
 };
 const defaultConfig = (): RagConfig => ({
  endpoint: "",
  authToken: "",
  timeoutMs: 10_000,
  fallbackToLocal: true,
  chunkStrategy: "heading",
  chunkSizeChars: 1500,
  topK: 5,
  minRelevance: 0,
  includeTags: [],
  excludeTags: [],
 });
 const expandEnv = (value: string): string =>
  value.replace(/\$\{([A-Z0-9_]+):?\}/g, (_m, name: string) => process.env[name] ?? "");
 const normalize = (raw: unknown): RagConfig => {
  if (!raw || typeof raw !== "object") return defaultConfig();
  const r = raw as Partial<RagConfig>;
  const chunkStrategy: RagChunkStrategy =
    r.chunkStrategy === "paragraph" || r.chunkStrategy === "fixed" ? r.chunkStrategy : "heading";
  return {
    endpoint: expandEnv(String(r.endpoint ?? "").trim()),
    authToken: expandEnv(String(r.authToken ?? "").trim()),
    timeoutMs: Math.max(100, Number(r.timeoutMs ?? 10_000)),
    fallbackToLocal: r.fallbackToLocal !== false,
    chunkStrategy,
    chunkSizeChars: Math.max(200, Number(r.chunkSizeChars ?? 1500)),
    topK: Math.max(1, Number(r.topK ?? 5)),
    minRelevance: Math.max(0, Math.min(1, Number(r.minRelevance ?? 0))),
    includeTags: Array.isArray(r.includeTags) ? r.includeTags.map(String) : [],
    excludeTags: Array.isArray(r.excludeTags) ? r.excludeTags.map(String) : [],
  };
 };
 const defaultPath = (): string =>
  envString(process.env.RAG_CONFIG_PATH, resolve(process.cwd(), "../../config/rag.yml"));
 export const loadRagConfig = (configPath: string = defaultPath()): RagConfig => {
  try {
    const raw = readFileSync(configPath, "utf8");
    const parsed = parse(raw) as { rag?: unknown } | null;
    return normalize(parsed?.rag);
  } catch (error) {
    const code = (error as NodeJS.ErrnoException).code;
    if (code === "ENOENT") return defaultConfig();
    throw error;
  }
 };
 export const toPublicRagConfig = (config: RagConfig): PublicRagConfig => {
  const { authToken: _auth, ...rest } = config;
  return { ...rest, hasAuthToken: Boolean(config.authToken) };
 };
--- a/apps/api/src/rag/routes.ts
+++ b/apps/api/src/rag/routes.ts
@@ -0,0 +1,6 @@
 import type { FastifyInstance } from "fastify";
 import { loadRagConfig, toPublicRagConfig } from "./config.js";
 export const registerRagRoutes = async (app: FastifyInstance) => {
  app.get("/api/rag/config", async () => toPublicRagConfig(loadRagConfig()));
 };
--- a/apps/api/src/rate-limit.ts
+++ b/apps/api/src/rate-limit.ts
@@ -0,0 +1,88 @@
 // In-memory token bucket rate limiter keyed by an arbitrary id (user id).
 //
 // Token bucket semantics:
 //   - Capacity = `burst` (max tokens the bucket can hold).
 //   - Refill   = `perMinute / 60` tokens per second (lazy: tokens are added
 //                on `consume` based on elapsed time since the bucket was
 //                last touched).
 //   - Each accepted call consumes exactly one token. Calls that find the
 //     bucket empty are rejected and the caller gets back a Retry-After hint
 //     in milliseconds.
 //
 // Stale entries: the map only grows by the number of distinct ids seen.
 // For a 5-user MVP this is bounded; for larger installs the caller can call
 // `pruneStale(maxIdleMs)` periodically.
 import { envNumber } from "./env.js";
 export type RateLimiterOptions = {
  perMinute: number;
  burst: number;
 };
 export type ConsumeResult =
  | { ok: true; remaining: number }
  | { ok: false; retryAfterMs: number };
 export type RateLimiter = {
  consume(id: string, now?: number): ConsumeResult;
  size: () => number;
  reset: (id?: string) => void;
 };
 export const createRateLimiter = (
  options: RateLimiterOptions,
 ): RateLimiter => {
  const { perMinute, burst } = options;
  const refillPerMs = perMinute / 60_000;
  const buckets = new Map<string, { tokens: number; lastRefillMs: number }>();
  const consume = (id: string, now: number = Date.now()): ConsumeResult => {
    let bucket = buckets.get(id);
    if (!bucket) {
      bucket = { tokens: burst, lastRefillMs: now };
      buckets.set(id, bucket);
    } else {
      const elapsed = Math.max(0, now - bucket.lastRefillMs);
      const refilled = elapsed * refillPerMs;
      if (refilled > 0) {
        bucket.tokens = Math.min(burst, bucket.tokens + refilled);
        bucket.lastRefillMs = now;
      }
    }
    if (bucket.tokens >= 1) {
      bucket.tokens -= 1;
      return { ok: true, remaining: Math.floor(bucket.tokens) };
    }
    // Time until the bucket has at least 1 token.
    const needed = 1 - bucket.tokens;
    const retryAfterMs = refillPerMs > 0 ? Math.ceil(needed / refillPerMs) : 60_000;
    return { ok: false, retryAfterMs };
  };
  return {
    consume,
    size: () => buckets.size,
    reset: (id) => {
      if (id === undefined) buckets.clear();
      else buckets.delete(id);
    },
  };
 };
 export const chatRateLimiterFromEnv = (): RateLimiter => {
  const perMinute = Math.max(1, envNumber(process.env.CHAT_RATE_LIMIT_PER_MINUTE, 20));
  const burst = Math.max(1, envNumber(process.env.CHAT_RATE_LIMIT_BURST, 5));
  return createRateLimiter({ perMinute, burst });
 };
 export const webhookRateLimiterFromEnv = (): RateLimiter => {
  // Defaults: 60/min refill, burst 10. Generous on purpose — the goal is
  // to stop a runaway loop, not throttle a real operator. Tighten per
  // webhook_id in the future if a specific hook becomes a hotspot.
  const perMinute = Math.max(1, envNumber(process.env.WEBHOOK_RATE_LIMIT_PER_MINUTE, 60));
  const burst = Math.max(1, envNumber(process.env.WEBHOOK_RATE_LIMIT_BURST, 10));
  return createRateLimiter({ perMinute, burst });
 };
--- a/apps/api/src/server.ts
+++ b/apps/api/src/server.ts
@@ -0,0 +1,170 @@
 import Fastify from "fastify";
 import cors from "@fastify/cors";
 import { ZodError } from "zod";
 import { config as loadDotenv } from "dotenv";
 import { dirname, resolve } from "node:path";
 import { fileURLToPath } from "node:url";
 // Load `.env` from the repo root regardless of the cwd the process was
 // started from. `import "dotenv/config"` would only look in `process.cwd()`,
 // which silently breaks when the API is started from a sub-directory.
 const __dirnameApi = dirname(fileURLToPath(import.meta.url));
 loadDotenv({ path: resolve(__dirnameApi, "../../../.env") });
 import { registerAuthRoutes } from "./auth/routes.js";
 import { registerChatRoutes } from "./chat/routes.js";
 import { openDatabase } from "./db/database.js";
 import { migrate } from "./db/migrate.js";
 import { registerDocsRoutes } from "./docs/routes.js";
 import { envNumber } from "./env.js";
 import { registerMcpRoutes } from "./mcp/routes.js";
 import { registerModelRoutes } from "./models/routes.js";
 import { registerN8nRoutes } from "./n8n/routes.js";
 import { registerRagRoutes } from "./rag/routes.js";
 import { registerSessionRoutes } from "./sessions/routes.js";
 import { registerSkillRoutes } from "./skills/routes.js";
 import { runWebhookAuditPurge, webhookAuditPurgeConfigFromEnv } from "./webhooks/audit.js";
 import { registerWebhookRoutes } from "./webhooks/routes.js";
 import { observeHttp, renderPrometheusText, snapshotMetrics } from "./metrics.js";
 const port = envNumber(process.env.API_PORT, 8787);
 const bodyLimit = envNumber(process.env.API_BODY_LIMIT_BYTES, 1_048_576);
 const corsOrigin = () => {
  const configured = process.env.CORS_ALLOWED_ORIGINS;
  if (!configured) return true;
  const origins = configured
    .split(",")
    .map((origin) => origin.trim())
    .filter(Boolean);
  if (origins.includes("*")) return true;
  return origins;
 };
 const app = Fastify({
  logger: true,
  bodyLimit,
 });
 const db = openDatabase();
 migrate(db);
 await app.register(cors, {
  origin: corsOrigin(),
 });
 // Observability: track every request by route + method + status with duration.
 // The route template (e.g. "/api/sessions/:id") is preferred over the raw URL
 // so `/api/sessions/abc` and `/api/sessions/def` aggregate into the same bucket.
 app.addHook("onResponse", async (request, reply) => {
  const route = request.routeOptions?.url ?? request.url ?? "unknown";
  observeHttp({
    route,
    method: request.method,
    status: reply.statusCode,
    durationMs: Math.round(performance.now() - (request as { sic_startedAt?: number }).sic_startedAt!),
    timestamp: Date.now(),
  });
 });
 app.addHook("onRequest", async (request) => {
  (request as { sic_startedAt?: number }).sic_startedAt = performance.now();
 });
 app.addHook("onSend", async (_request, reply, payload) => {
  reply.header("x-content-type-options", "nosniff");
  reply.header("referrer-policy", "no-referrer");
  reply.header("x-frame-options", "DENY");
  return payload;
 });
 app.setErrorHandler((error, _request, reply) => {
  const message = error instanceof Error ? error.message : String(error);
  if (error instanceof ZodError) {
    return reply.code(400).send({
      error: "validation_error",
      issues: error.issues,
    });
  }
  if (
    message.startsWith("auth_") ||
    message.startsWith("JWT") ||
    message.startsWith("JWKS")
  ) {
    return reply.code(401).send({ error: "unauthorized" });
  }
  app.log.error(error);
  return reply.code(500).send({ error: "internal_error" });
 });
 app.get("/healthz", async () => ({ status: "ok" }));
 app.get("/readyz", async () => {
  db.prepare("SELECT 1").get();
  return { status: "ready" };
 });
 app.get("/api/version", async () => ({
  name: "pi-chat-api",
  version: "0.1.0",
 }));
 // Observability surface. `/metrics` returns Prometheus text (scraper-friendly);
 // `/api/metrics` returns the same data as JSON for humans and the smoke test.
 app.get("/metrics", async (_request, reply) => {
  reply.header("content-type", "text/plain; version=0.0.4; charset=utf-8");
  return renderPrometheusText();
 });
 app.get("/api/metrics", async () => snapshotMetrics());
 await registerSessionRoutes(app, db);
 await registerAuthRoutes(app);
 await registerDocsRoutes(app);
 await registerModelRoutes(app);
 await registerRagRoutes(app);
 await registerSkillRoutes(app);
 await registerN8nRoutes(app);
 await registerMcpRoutes(app);
 await registerWebhookRoutes(app, db);
 await registerChatRoutes(app, db);
 // Audit retention: run once on boot, then on a timer. Cheap, idempotent.
 const auditPurgeConfig = webhookAuditPurgeConfigFromEnv();
 const initialPurge = runWebhookAuditPurge(db, auditPurgeConfig);
 if (initialPurge.deletedByAge > 0 || initialPurge.deletedByCap > 0) {
  app.log.info(
    { ...initialPurge, config: auditPurgeConfig },
    "webhook audit purge (boot)",
  );
 }
 const auditPurgeIntervalMs = Math.max(60_000, envNumber(process.env.WEBHOOK_AUDIT_PURGE_INTERVAL_MS, 3_600_000));
 const auditPurgeTimer = setInterval(() => {
  try {
    const report = runWebhookAuditPurge(db, auditPurgeConfig);
    if (report.deletedByAge > 0 || report.deletedByCap > 0) {
      app.log.info({ ...report, config: auditPurgeConfig }, "webhook audit purge (timer)");
    }
  } catch (error) {
    app.log.error({ err: error }, "webhook audit purge failed");
  }
 }, auditPurgeIntervalMs);
 auditPurgeTimer.unref?.();
 const shutdown = async () => {
  app.log.info("shutdown requested");
  clearInterval(auditPurgeTimer);
  await app.close();
  db.close();
 };
 process.on("SIGINT", shutdown);
 process.on("SIGTERM", shutdown);
 await app.listen({ port, host: "0.0.0.0" });
--- a/apps/api/src/sessions/repository.ts
+++ b/apps/api/src/sessions/repository.ts
@@ -0,0 +1,140 @@
 import { randomUUID } from "node:crypto";
 import type { AppDatabase } from "../db/database.js";
 export type ChatSessionRecord = {
  id: string;
  user_id: string;
  title: string | null;
  system_prompt: string | null;
  created_at: string;
  updated_at: string;
 };
 export type ChatMessageRecord = {
  id: string;
  session_id: string;
  user_id: string;
  role: "user" | "assistant" | "system" | "tool";
  content: string;
  metadata: string | null;
  created_at: string;
 };
 export function createSessionRepository(db: AppDatabase) {
  return {
    list(userId: string): ChatSessionRecord[] {
      return db
        .prepare("SELECT * FROM chat_sessions WHERE user_id = ? ORDER BY updated_at DESC")
        .all(userId) as ChatSessionRecord[];
    },
    create(userId: string, title: string | null): ChatSessionRecord {
      const now = new Date().toISOString();
      const session: ChatSessionRecord = {
        id: randomUUID(),
        user_id: userId,
        title,
        system_prompt: null,
        created_at: now,
        updated_at: now,
      };
      db.prepare(
        "INSERT INTO chat_sessions (id, user_id, title, created_at, updated_at) VALUES (?, ?, ?, ?, ?)",
      ).run(session.id, session.user_id, session.title, session.created_at, session.updated_at);
      return session;
    },
    get(userId: string, sessionId: string): ChatSessionRecord | null {
      return (
        (db.prepare("SELECT * FROM chat_sessions WHERE id = ? AND user_id = ?").get(sessionId, userId) as
          | ChatSessionRecord
          | undefined) ?? null
      );
    },
    delete(userId: string, sessionId: string): boolean {
      const result = db.prepare("DELETE FROM chat_sessions WHERE id = ? AND user_id = ?").run(sessionId, userId);
      return result.changes > 0;
    },
    deleteAllForUser(userId: string): number {
      const result = db
        .prepare("DELETE FROM chat_sessions WHERE user_id = ?")
        .run(userId);
      return Number(result.changes ?? 0);
    },
    touch(userId: string, sessionId: string): void {
      db.prepare("UPDATE chat_sessions SET updated_at = ? WHERE id = ? AND user_id = ?").run(
        new Date().toISOString(),
        sessionId,
        userId,
      );
    },
    updateTitle(userId: string, sessionId: string, title: string): void {
      db.prepare("UPDATE chat_sessions SET title = ?, updated_at = ? WHERE id = ? AND user_id = ?").run(
        title,
        new Date().toISOString(),
        sessionId,
        userId,
      );
    },
    updateSystemPrompt(userId: string, sessionId: string, prompt: string | null): boolean {
      const normalized = prompt && prompt.trim().length > 0 ? prompt.trim() : null;
      const result = db
        .prepare(
          "UPDATE chat_sessions SET system_prompt = ?, updated_at = ? WHERE id = ? AND user_id = ?",
        )
        .run(normalized, new Date().toISOString(), sessionId, userId);
      return result.changes > 0;
    },
  };
 }
 export function createMessageRepository(db: AppDatabase) {
  return {
    listForSession(userId: string, sessionId: string): ChatMessageRecord[] {
      return db
        .prepare("SELECT * FROM chat_messages WHERE session_id = ? AND user_id = ? ORDER BY created_at ASC")
        .all(sessionId, userId) as ChatMessageRecord[];
    },
    create(input: {
      sessionId: string;
      userId: string;
      role: ChatMessageRecord["role"];
      content: string;
      metadata?: unknown;
    }): ChatMessageRecord {
      const message: ChatMessageRecord = {
        id: randomUUID(),
        session_id: input.sessionId,
        user_id: input.userId,
        role: input.role,
        content: input.content,
        metadata: input.metadata ? JSON.stringify(input.metadata) : null,
        created_at: new Date().toISOString(),
      };
      db.prepare(
        `INSERT INTO chat_messages
          (id, session_id, user_id, role, content, metadata, created_at)
          VALUES (?, ?, ?, ?, ?, ?, ?)`,
      ).run(
        message.id,
        message.session_id,
        message.user_id,
        message.role,
        message.content,
        message.metadata,
        message.created_at,
      );
      return message;
    },
  };
 }
--- a/apps/api/src/sessions/routes.ts
+++ b/apps/api/src/sessions/routes.ts
@@ -0,0 +1,172 @@
 import type { FastifyInstance } from "fastify";
 import { z } from "zod";
 import { getAuthUser } from "../auth/index.js";
 import type { AppDatabase } from "../db/database.js";
 import { createMessageRepository, createSessionRepository } from "./repository.js";
 const createSessionBody = z.object({
  title: z.string().min(1).max(120).optional(),
 });
 const updateSessionBody = z.object({
  title: z.string().trim().min(1).max(120),
 });
 const updateSystemPromptBody = z.object({
  // Empty / whitespace-only strings clear the override; null is a no-op.
  system_prompt: z.string().max(8_000).nullable().optional(),
 });
 export async function registerSessionRoutes(app: FastifyInstance, db: AppDatabase) {
  const sessions = createSessionRepository(db);
  const messages = createMessageRepository(db);
  app.get("/api/sessions", async (request) => {
    const user = await getAuthUser(request);
    return { items: sessions.list(user.id) };
  });
  app.post("/api/sessions", async (request, reply) => {
    const user = await getAuthUser(request);
    const body = createSessionBody.parse(request.body ?? {});
    const session = sessions.create(user.id, body.title ?? null);
    return reply.code(201).send(session);
  });
  app.get<{ Params: { id: string } }>("/api/sessions/:id", async (request, reply) => {
    const user = await getAuthUser(request);
    const session = sessions.get(user.id, request.params.id);
    if (!session) {
      return reply.code(404).send({ error: "session_not_found" });
    }
    return {
      ...session,
      messages: messages.listForSession(user.id, session.id),
    };
  });
  app.patch<{ Params: { id: string } }>("/api/sessions/:id", async (request, reply) => {
    const user = await getAuthUser(request);
    const body = updateSessionBody.parse(request.body);
    const session = sessions.get(user.id, request.params.id);
    if (!session) {
      return reply.code(404).send({ error: "session_not_found" });
    }
    sessions.updateTitle(user.id, session.id, body.title);
    return sessions.get(user.id, session.id);
  });
  // Per-session system prompt override. Inserted into the chat stream
  // immediately after the base identity prompt, before the docs/actions
  // context. Use to attach incident-specific context (runbook link, on-call
  // names, severity matrix) without polluting the global prompt.
  app.patch<{ Params: { id: string } }>(
    "/api/sessions/:id/system-prompt",
    async (request, reply) => {
      const user = await getAuthUser(request);
      const body = updateSystemPromptBody.parse(request.body ?? {});
      const session = sessions.get(user.id, request.params.id);
      if (!session) {
        return reply.code(404).send({ error: "session_not_found" });
      }
      sessions.updateSystemPrompt(user.id, session.id, body.system_prompt ?? null);
      return sessions.get(user.id, session.id);
    },
  );
  app.delete<{ Params: { id: string } }>("/api/sessions/:id", async (request, reply) => {
    const user = await getAuthUser(request);
    const deleted = sessions.delete(user.id, request.params.id);
    if (!deleted) {
      return reply.code(404).send({ error: "session_not_found" });
    }
    return reply.code(204).send();
  });
  // Bulk delete: wipes every session owned by the current user. Cascade
  // removes the messages and webhook_runs that point at them. The frontend
  // requires the user to type the literal word "delete" before this fires.
  app.delete("/api/sessions", async (request, reply) => {
    const user = await getAuthUser(request);
    const removed = sessions.deleteAllForUser(user.id);
    return reply.code(200).send({ deleted: removed });
  });
  // Export: returns a JSON document with the session metadata and all its
  // messages. The shape is a stable contract so a `POST /api/sessions/import`
  // can read it back. webhook_runs are intentionally excluded from the
  // export — those are operational audit data, not conversation content.
  app.get<{ Params: { id: string } }>("/api/sessions/:id/export", async (request, reply) => {
    const user = await getAuthUser(request);
    const session = sessions.get(user.id, request.params.id);
    if (!session) {
      return reply.code(404).send({ error: "session_not_found" });
    }
    return {
      version: 1,
      exported_at: new Date().toISOString(),
      session: {
        id: session.id,
        title: session.title,
        created_at: session.created_at,
        updated_at: session.updated_at,
      },
      messages: messages.listForSession(user.id, session.id),
    };
  });
  // Import: accepts the export document above, creates a new session owned
  // by the caller, and writes the messages with fresh ids. Returns the new
  // session id and a count of imported messages.
  const importSessionBody = z.object({
    session: z.object({
      title: z.string().max(120).nullable().optional(),
      created_at: z.string().optional(),
      updated_at: z.string().optional(),
    }),
    messages: z.array(
      z.object({
        role: z.enum(["user", "assistant", "system"]),
        content: z.string().min(1).max(50_000),
        metadata: z.record(z.unknown()).optional(),
        // Original created_at is preserved if present; otherwise "now" is
        // used. Used only to restore the timeline.
        created_at: z.string().optional(),
      }),
    ),
  });
  app.post("/api/sessions/import", async (request, reply) => {
    const user = await getAuthUser(request);
    const body = importSessionBody.parse(request.body);
    const newSession = sessions.create(user.id, body.session.title ?? null);
    let imported = 0;
    for (const message of body.messages) {
      messages.create({
        sessionId: newSession.id,
        userId: user.id,
        role: message.role,
        content: message.content,
        metadata: message.metadata,
      });
      imported += 1;
    }
    sessions.touch(user.id, newSession.id);
    return reply.code(201).send({
      session: newSession,
      imported_messages: imported,
    });
  });
 }
--- a/apps/api/src/skills/config.ts
+++ b/apps/api/src/skills/config.ts
@@ -0,0 +1,57 @@
 import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
 import { parse } from "yaml";
 import { envString } from "../env.js";
 export type SkillDefinition = {
  id: string;
  name: string;
  description: string;
  enabled: boolean;
  prompt: string;
 };
 export type PublicSkillDefinition = Omit<SkillDefinition, "prompt">;
 type SkillsFile = { skills?: SkillDefinition[] };
 const defaultPath = (): string => {
  // When the API is started from apps/api, the config dir is at ../../config.
  // The env var wins so tests / docker setups can override.
  return envString(process.env.SKILLS_CONFIG_PATH, resolve(process.cwd(), "../../config/skills.yml"));
 };
 export const loadSkillDefinitions = (
  configPath: string = defaultPath(),
 ): SkillDefinition[] => {
  try {
    const raw = readFileSync(configPath, "utf8");
    const parsed = parse(raw) as SkillsFile;
    if (!parsed || !Array.isArray(parsed.skills)) return [];
    return parsed.skills
      .filter((skill) => skill && typeof skill === "object")
      .map((skill) => ({
        id: String(skill.id ?? "").trim(),
        name: String(skill.name ?? "").trim(),
        description: String(skill.description ?? "").trim(),
        enabled: Boolean(skill.enabled),
        prompt: String(skill.prompt ?? "").trim(),
      }))
      .filter((skill) => skill.id.length > 0);
  } catch (error) {
    // Config is optional: missing file is fine, malformed file should surface.
    const code = (error as NodeJS.ErrnoException).code;
    if (code === "ENOENT") return [];
    throw error;
  }
 };
 export const getEnabledSkillPrompts = (skills: SkillDefinition[] = loadSkillDefinitions()): string[] =>
  skills.filter((skill) => skill.enabled && skill.prompt.length > 0).map((skill) => skill.prompt);
 export const toPublicSkill = (skill: SkillDefinition): PublicSkillDefinition => ({
  id: skill.id,
  name: skill.name,
  description: skill.description,
  enabled: skill.enabled,
 });
--- a/apps/api/src/skills/routes.ts
+++ b/apps/api/src/skills/routes.ts
@@ -0,0 +1,9 @@
 import type { FastifyInstance } from "fastify";
 import { loadSkillDefinitions, toPublicSkill } from "./config.js";
 export const registerSkillRoutes = async (app: FastifyInstance) => {
  app.get("/api/skills", async () => {
    const items = loadSkillDefinitions().map(toPublicSkill);
    return { items };
  });
 };
--- a/apps/api/src/webhooks/audit.ts
+++ b/apps/api/src/webhooks/audit.ts
@@ -0,0 +1,162 @@
 import { randomUUID } from "node:crypto";
 import type { AppDatabase } from "../db/database.js";
 import { envNumber } from "../env.js";
 export type WebhookRunStatus = "success" | "error";
 export type WebhookRunRecord = {
  id: string;
  webhook_id: string;
  user_id: string;
  session_id: string;
  status: WebhookRunStatus;
  request_payload: string | null;
  response_status: number | null;
  attempts: number;
  created_at: string;
 };
 export const createWebhookAuditRepository = (db: AppDatabase) => ({
  create(input: {
    webhookId: string;
    userId: string;
    sessionId: string;
    status: WebhookRunStatus;
    requestPayload?: unknown;
    responseStatus?: number | null;
    attempts?: number;
    createdAt?: string;
  }) {
    const run = {
      id: randomUUID(),
      webhook_id: input.webhookId,
      user_id: input.userId,
      session_id: input.sessionId,
      status: input.status,
      request_payload: input.requestPayload ? JSON.stringify(input.requestPayload) : null,
      response_status: input.responseStatus ?? null,
      attempts: input.attempts ?? 1,
      created_at: input.createdAt ?? new Date().toISOString(),
    };
    db.prepare(
      `INSERT INTO webhook_runs (id, webhook_id, user_id, session_id, status, request_payload, response_status, attempts, created_at)
       VALUES (@id, @webhook_id, @user_id, @session_id, @status, @request_payload, @response_status, @attempts, @created_at)`,
    ).run(run);
    return run;
  },
  listForSession(userId: string, sessionId: string, limit = 20) {
    return db
      .prepare(
        `SELECT id, webhook_id, user_id, session_id, status, request_payload, response_status, attempts, created_at
         FROM webhook_runs
         WHERE session_id = ? AND user_id = ?
         ORDER BY created_at DESC
         LIMIT ?`,
      )
      .all(sessionId, userId, limit) as WebhookRunRecord[];
  },
  purgeOlderThan(isoCutoff: string): number {
    const result = db
      .prepare(`DELETE FROM webhook_runs WHERE created_at < ?`)
      .run(isoCutoff);
    return Number(result.changes ?? 0);
  },
  enforcePerUserCap(maxPerUser: number): number {
    if (maxPerUser <= 0) return 0;
    // Uses SQLite window function (3.25+) to keep the most recent N rows
    // per user and delete the rest. user_id is always present in the table.
    const result = db
      .prepare(
        `DELETE FROM webhook_runs
         WHERE rowid IN (
           SELECT rowid FROM (
             SELECT rowid,
                    ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC, rowid DESC) AS rn
             FROM webhook_runs
           )
           WHERE rn > ?
         )`,
      )
      .run(maxPerUser);
    return Number(result.changes ?? 0);
  },
  /**
   * Per-webhook usage stats for a single user over a recent time window.
   * `isoSince` should be a UTC ISO string (e.g. now - 7 days).
   * Returns a map of webhook_id -> { runs, successes, successRate }.
   */
  usageForUserSince(isoSince: string, userId: string): Record<
    string,
    { runs: number; successes: number; successRate: number }
  > {
    const rows = db
      .prepare(
        `SELECT webhook_id,
                COUNT(*) AS runs,
                SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) AS successes
         FROM webhook_runs
         WHERE user_id = ? AND created_at >= ?
         GROUP BY webhook_id`,
      )
      .all(userId, isoSince) as Array<{
      webhook_id: string;
      runs: number;
      successes: number | null;
    }>;
    const out: Record<string, { runs: number; successes: number; successRate: number }> = {};
    for (const row of rows) {
      const runs = Number(row.runs ?? 0);
      const successes = Number(row.successes ?? 0);
      out[row.webhook_id] = {
        runs,
        successes,
        successRate: runs > 0 ? successes / runs : 0,
      };
    }
    return out;
  },
 });
 export type WebhookAuditPurgeConfig = {
  retentionDays: number;
  maxPerUser: number;
 };
 export const webhookAuditPurgeConfigFromEnv = (): WebhookAuditPurgeConfig => {
  const retentionDays = Math.max(0, envNumber(process.env.WEBHOOK_RUNS_RETENTION_DAYS, 30));
  const maxPerUser = Math.max(0, envNumber(process.env.WEBHOOK_RUNS_MAX_PER_USER, 1000));
  return { retentionDays, maxPerUser };
 };
 export type WebhookAuditPurgeReport = {
  deletedByAge: number;
  deletedByCap: number;
  cutoff: string | null;
 };
 /**
 * Run both purge passes (age + per-user cap) against the audit table.
 * Returns a small report for logging / health endpoints. Safe to call on
 * every boot and on a timer.
 */
 export const runWebhookAuditPurge = (
  db: AppDatabase,
  config: WebhookAuditPurgeConfig = webhookAuditPurgeConfigFromEnv(),
 ): WebhookAuditPurgeReport => {
  const audit = createWebhookAuditRepository(db);
  let deletedByAge = 0;
  let cutoff: string | null = null;
  if (config.retentionDays > 0) {
    cutoff = new Date(Date.now() - config.retentionDays * 86_400_000).toISOString();
    deletedByAge = audit.purgeOlderThan(cutoff);
  }
  const deletedByCap = config.maxPerUser > 0 ? audit.enforcePerUserCap(config.maxPerUser) : 0;
  return { deletedByAge, deletedByCap, cutoff };
 };
--- a/apps/api/src/webhooks/config.ts
+++ b/apps/api/src/webhooks/config.ts
@@ -0,0 +1,53 @@
 import { readFileSync } from "node:fs";
 import { resolve } from "node:path";
 import YAML from "yaml";
 import { envString } from "../env.js";
 export type WebhookMethod = "GET" | "POST" | "PUT" | "PATCH" | "DELETE";
 export type WebhookDefinition = {
  id: string;
  label: string;
  description?: string;
  method: WebhookMethod;
  url: string;
  required_roles: string[];
  confirmation_required: boolean;
  payload_template?: unknown;
 };
 export type PublicWebhookDefinition = Omit<WebhookDefinition, "url" | "payload_template">;
 const defaultConfigPath = () => resolve(process.cwd(), "../../config/webhooks.yml");
 const expandEnv = (value: string) =>
  value.replace(/\$\{([A-Z0-9_]+)\}/g, (_match, name: string) => process.env[name] ?? "");
 export const loadWebhookDefinitions = (
  configPath = process.env.WEBHOOKS_CONFIG_PATH && process.env.WEBHOOKS_CONFIG_PATH.length > 0
    ? process.env.WEBHOOKS_CONFIG_PATH
    : defaultConfigPath(),
 ): WebhookDefinition[] => {
  const raw = readFileSync(configPath, "utf8");
  const parsed = YAML.parse(raw) as { webhooks?: WebhookDefinition[] } | undefined;
  return (parsed?.webhooks ?? []).map((webhook) => ({
    ...webhook,
    method: webhook.method.toUpperCase() as WebhookMethod,
    url: expandEnv(webhook.url),
    required_roles: webhook.required_roles ?? [],
    confirmation_required: webhook.confirmation_required ?? true,
  }));
 };
 export const canUseWebhook = (userRoles: string[], webhook: WebhookDefinition) =>
  webhook.required_roles.every((role) => userRoles.includes(role));
 export const toPublicWebhook = (webhook: WebhookDefinition): PublicWebhookDefinition => ({
  id: webhook.id,
  label: webhook.label,
  description: webhook.description,
  method: webhook.method,
  required_roles: webhook.required_roles,
  confirmation_required: webhook.confirmation_required,
 });
--- a/apps/api/src/webhooks/routes.ts
+++ b/apps/api/src/webhooks/routes.ts
@@ -0,0 +1,288 @@
 import type { AuthUser } from "@pi-chat/shared";
 import type { FastifyInstance } from "fastify";
 import { z } from "zod";
 import type { AppDatabase } from "../db/database.js";
 import { getAuthUser } from "../auth/index.js";
 import { webhookRateLimiterFromEnv } from "../rate-limit.js";
 import { createSessionRepository } from "../sessions/repository.js";
 import { createWebhookAuditRepository } from "./audit.js";
 import { canUseWebhook, loadWebhookDefinitions, toPublicWebhook } from "./config.js";
 import { envNumber } from "../env.js";
 const runWebhookBody = z.object({
  sessionId: z.string().min(1),
  confirmed: z.literal(true),
  lastUserMessage: z.string().max(envNumber(process.env.CHAT_MESSAGE_MAX_CHARS, 8_000)).default(""),
  payload: z.record(z.unknown()).default({}),
 });
 const webhookRunsQuery = z.object({
  sessionId: z.string().min(1),
  limit: z.coerce.number().int().min(1).max(50).default(20),
 });
 const renderTemplate = (template: unknown, context: Record<string, unknown>): unknown => {
  if (typeof template === "string") {
    return template.replace(/\{\{([a-zA-Z0-9_.]+)\}\}/g, (_match, path: string) => {
      const value = path.split(".").reduce<unknown>((current, key) => {
        if (current && typeof current === "object" && key in current) {
          return (current as Record<string, unknown>)[key];
        }
        return "";
      }, context);
      return value == null ? "" : String(value);
    });
  }
  if (Array.isArray(template)) {
    return template.map((item) => renderTemplate(item, context));
  }
  if (template && typeof template === "object") {
    return Object.fromEntries(
      Object.entries(template).map(([key, value]) => [key, renderTemplate(value, context)]),
    );
  }
  return template;
 };
 const buildPayload = (template: unknown, input: z.infer<typeof runWebhookBody>, user: AuthUser) => {
  const templated = renderTemplate(template ?? {}, {
    user,
    session: { id: input.sessionId },
    chat: { last_user_message: input.lastUserMessage },
  });
  return {
    ...(templated && typeof templated === "object" && !Array.isArray(templated) ? templated : {}),
    ...input.payload,
  };
 };
 const fetchWithTimeout = async (url: string, init: RequestInit, timeoutMs: number) => {
  const abortController = new AbortController();
  const timeout = setTimeout(() => abortController.abort(), timeoutMs);
  return fetch(url, { ...init, signal: abortController.signal }).finally(() => clearTimeout(timeout));
 };
 const sleep = (ms: number) =>
  new Promise<void>((resolve) => {
    setTimeout(resolve, ms);
  });
 const isRetryableStatus = (status: number) => status >= 500 || status === 429;
 type RetryPolicy = {
  maxAttempts: number;
  initialBackoffMs: number;
  maxBackoffMs: number;
  timeoutMs: number;
 };
 const retryPolicyFromEnv = (): RetryPolicy => {
  const maxAttempts = Math.max(1, envNumber(process.env.WEBHOOK_RETRY_MAX_ATTEMPTS, 3));
  const initialBackoffMs = Math.max(0, envNumber(process.env.WEBHOOK_RETRY_INITIAL_BACKOFF_MS, 500));
  const maxBackoffMs = Math.max(initialBackoffMs, envNumber(process.env.WEBHOOK_RETRY_MAX_BACKOFF_MS, 5_000));
  const timeoutMs = Math.max(1, envNumber(process.env.WEBHOOK_TIMEOUT_MS, 15_000));
  return { maxAttempts, initialBackoffMs, maxBackoffMs, timeoutMs };
 };
 type RunOutcome = {
  response: Response | null;
  attempts: number;
  lastError: unknown;
 };
 const runWithRetry = async (url: string, init: RequestInit, policy: RetryPolicy): Promise<RunOutcome> => {
  let lastError: unknown = null;
  let response: Response | null = null;
  for (let attempt = 1; attempt <= policy.maxAttempts; attempt++) {
    try {
      response = await fetchWithTimeout(url, init, policy.timeoutMs);
      if (response.ok) {
        return { response, attempts: attempt, lastError: null };
      }
      if (!isRetryableStatus(response.status)) {
        // 4xx (non-429): don't retry, surface as-is.
        return { response, attempts: attempt, lastError: null };
      }
    } catch (error) {
      lastError = error;
      response = null;
    }
    if (attempt < policy.maxAttempts) {
      const backoff = Math.min(
        policy.maxBackoffMs,
        policy.initialBackoffMs * 2 ** (attempt - 1),
      );
      await sleep(backoff);
    }
  }
  return { response, attempts: policy.maxAttempts, lastError };
 };
 export const registerWebhookRoutes = async (app: FastifyInstance, db: AppDatabase) => {
  const sessions = createSessionRepository(db);
  const audit = createWebhookAuditRepository(db);
  const webhookRateLimiter = webhookRateLimiterFromEnv();
  app.get("/api/webhooks", async (request) => {
    const user = await getAuthUser(request);
    const items = loadWebhookDefinitions()
      .filter((webhook) => canUseWebhook(user.roles, webhook))
      .map(toPublicWebhook);
    return { items };
  });
  app.get("/api/webhooks/usage", async (request) => {
    const user = await getAuthUser(request);
    const query = z
      .object({ days: z.coerce.number().int().min(1).max(365).default(7) })
      .parse(request.query ?? {});
    const since = new Date(Date.now() - query.days * 86_400_000).toISOString();
    const usage = audit.usageForUserSince(since, user.id);
    const items = Object.entries(usage).map(([webhook_id, stats]) => ({
      webhook_id,
      runs: stats.runs,
      successes: stats.successes,
      success_rate: stats.successRate,
      window_days: query.days,
    }));
    return { window_days: query.days, items };
  });
  app.get("/api/webhook-runs", async (request, reply) => {
    const user = await getAuthUser(request);
    const query = webhookRunsQuery.parse(request.query);
    const session = sessions.get(user.id, query.sessionId);
    if (!session) {
      return reply.code(404).send({ error: "session_not_found" });
    }
    const items = audit.listForSession(user.id, query.sessionId, query.limit).map((run) => ({
      id: run.id,
      webhook_id: run.webhook_id,
      session_id: run.session_id,
      status: run.status,
      response_status: run.response_status,
      attempts: run.attempts,
      created_at: run.created_at,
    }));
    return { items };
  });
  app.post("/api/webhooks/:id/run", async (request, reply) => {
    const user = await getAuthUser(request);
    const params = z.object({ id: z.string().min(1) }).parse(request.params);
    const body = runWebhookBody.parse(request.body);
    const webhook = loadWebhookDefinitions().find((item) => item.id === params.id);
    if (!webhook || !canUseWebhook(user.roles, webhook)) {
      return reply.code(404).send({ error: "webhook_not_found" });
    }
    // Per-webhook abuse detection: each webhook_id has its own bucket so a
    // runaway loop on one hook doesn't starve the rest. The bucket is shared
    // across all users on purpose — that's the abuse signal.
    const decision = webhookRateLimiter.consume(webhook.id);
    if (!decision.ok) {
      const retryAfterSec = Math.max(1, Math.ceil(decision.retryAfterMs / 1000));
      app.log.warn(
        { webhook: webhook.id, user: user.id, retryAfterSec },
        "webhook rate limit exceeded",
      );
      return reply
        .code(429)
        .header("retry-after", String(retryAfterSec))
        .header("x-ratelimit-remaining", "0")
        .send({
          error: "rate_limited",
          retry_after_ms: decision.retryAfterMs,
        });
    }
    reply.header("x-ratelimit-remaining", String(decision.remaining));
    const session = sessions.get(user.id, body.sessionId);
    if (!session) {
      return reply.code(404).send({ error: "session_not_found" });
    }
    const requestPayload = buildPayload(webhook.payload_template, body, user);
    if (!webhook.url) {
      audit.create({
        webhookId: webhook.id,
        userId: user.id,
        sessionId: body.sessionId,
        status: "error",
        requestPayload,
        attempts: 0,
      });
      return reply.code(500).send({ error: "webhook_not_configured" });
    }
    const policy = retryPolicyFromEnv();
    const outcome = await runWithRetry(
      webhook.url,
      {
        method: webhook.method,
        headers: { "content-type": "application/json" },
        body: webhook.method === "GET" ? undefined : JSON.stringify(requestPayload),
      },
      policy,
    );
    if (outcome.lastError) {
      app.log.error({ err: outcome.lastError, webhook: webhook.id, attempts: outcome.attempts }, "webhook request failed after retries");
    } else if (outcome.attempts > 1 && outcome.response) {
      app.log.warn(
        { webhook: webhook.id, attempts: outcome.attempts, status: outcome.response.status },
        "webhook request retried",
      );
    }
    const response = outcome.response;
    const httpOk = response?.ok ?? false;
    const isTransportError = !response;
    const run = audit.create({
      webhookId: webhook.id,
      userId: user.id,
      sessionId: body.sessionId,
      status: httpOk ? "success" : "error",
      requestPayload,
      responseStatus: response?.status ?? null,
      attempts: outcome.attempts,
    });
    if (isTransportError) {
      return reply.code(502).send({
        id: run.id,
        webhook_id: run.webhook_id,
        status: run.status,
        response_status: run.response_status,
        attempts: run.attempts,
        error: "webhook_request_failed",
      });
    }
    return reply.code(httpOk ? 200 : 502).send({
      id: run.id,
      webhook_id: run.webhook_id,
      status: run.status,
      response_status: run.response_status,
      attempts: run.attempts,
    });
  });
 };
--- a/apps/api/test/integration.test.ts
+++ b/apps/api/test/integration.test.ts
@@ -0,0 +1,151 @@
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
 import { existsSync, mkdirSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import Database from "better-sqlite3";
 import type { AppDatabase } from "../src/db/database.js";
 import { migrate } from "../src/db/migrate.js";
 import { createSessionRepository, createMessageRepository } from "../src/sessions/repository.js";
 import { createWebhookAuditRepository } from "../src/webhooks/audit.js";
 import { runWebhookAuditPurge } from "../src/webhooks/audit.js";
 let db: AppDatabase;
 let dbPath: string;
 beforeEach(() => {
  const dir = join(tmpdir(), `sic-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
  mkdirSync(dir, { recursive: true });
  dbPath = join(dir, "test.db");
  db = new Database(dbPath);
  db.pragma("foreign_keys = ON");
  migrate(db);
 });
 afterEach(() => {
  db.close();
  if (existsSync(dbPath)) rmSync(dbPath, { force: true });
 });
 describe("session isolation", () => {
  it("never returns sessions/messages from another user", () => {
    const sessions = createSessionRepository(db);
    const messages = createMessageRepository(db);
    const a = sessions.create("user-a", "A session");
    const b = sessions.create("user-b", "B session");
    messages.create({ sessionId: a.id, userId: "user-a", role: "user", content: "a msg" });
    messages.create({ sessionId: b.id, userId: "user-b", role: "user", content: "b msg" });
    // List filter
    expect(sessions.list("user-a").map((s) => s.id)).toEqual([a.id]);
    expect(sessions.list("user-b").map((s) => s.id)).toEqual([b.id]);
    // get() requires matching user_id
    expect(sessions.get("user-a", b.id)).toBeFalsy();
    expect(sessions.get("user-b", a.id)).toBeFalsy();
    // Messages filter by both session_id and user_id
    const bMessages = messages.listForSession("user-a", b.id);
    expect(bMessages).toEqual([]);
    const aMessages = messages.listForSession("user-a", a.id);
    expect(aMessages).toHaveLength(1);
    expect(aMessages[0]?.content).toBe("a msg");
  });
  it("delete cascades to messages", () => {
    const sessions = createSessionRepository(db);
    const messages = createMessageRepository(db);
    const s = sessions.create("user-a", null);
    const m = messages.create({
      sessionId: s.id,
      userId: "user-a",
      role: "user",
      content: "will be cascaded",
    });
    sessions.delete("user-a", s.id);
    expect(messages.listForSession("user-a", s.id)).toEqual([]);
    // Direct DB check that the message row is gone (not just hidden)
    const row = db.prepare("SELECT id FROM chat_messages WHERE id = ?").get(m.id);
    expect(row).toBeUndefined();
  });
  it("updateTitle only affects the owner's session", () => {
    const sessions = createSessionRepository(db);
    const a = sessions.create("user-a", "Old");
    sessions.updateTitle("user-a", a.id, "New");
    expect(sessions.get("user-a", a.id)?.title).toBe("New");
  });
 });
 describe("webhook audit + retention", () => {
  it("usageForUserSince aggregates per webhook", () => {
    const sessions = createSessionRepository(db);
    const audit = createWebhookAuditRepository(db);
    const s1 = sessions.create("user-a", "test");
    const s2 = sessions.create("user-b", "test");
    const now = Date.now();
    const fresh = new Date(now - 1_000).toISOString();
    const old = new Date(now - 100 * 86_400_000).toISOString();
    audit.create({ webhookId: "dns-flush", userId: "user-a", sessionId: s1.id, status: "success", createdAt: fresh });
    audit.create({ webhookId: "dns-flush", userId: "user-a", sessionId: s1.id, status: "success", createdAt: fresh });
    audit.create({ webhookId: "dns-flush", userId: "user-a", sessionId: s1.id, status: "error", createdAt: fresh });
    audit.create({ webhookId: "dns-flush", userId: "user-a", sessionId: s1.id, status: "success", createdAt: old });
    audit.create({ webhookId: "other-hook", userId: "user-b", sessionId: s2.id, status: "success", createdAt: fresh });
    const since = new Date(now - 7 * 86_400_000).toISOString();
    const usage = audit.usageForUserSince(since, "user-a");
    expect(usage["dns-flush"]?.runs).toBe(3);
    expect(usage["dns-flush"]?.successes).toBe(2);
    expect(usage["dns-flush"]?.successRate).toBeCloseTo(2 / 3);
    expect(usage["other-hook"]).toBeUndefined();
  });
  it("retention purge deletes old rows but keeps recent ones", () => {
    const sessions = createSessionRepository(db);
    const audit = createWebhookAuditRepository(db);
    const s = sessions.create("user-a", "test");
    const now = Date.now();
    const fresh = new Date(now - 60_000).toISOString();
    const stale = new Date(now - 100 * 86_400_000).toISOString();
    audit.create({ webhookId: "w", userId: "user-a", sessionId: s.id, status: "success", createdAt: fresh });
    audit.create({ webhookId: "w", userId: "user-a", sessionId: s.id, status: "success", createdAt: stale });
    audit.create({ webhookId: "w", userId: "user-a", sessionId: s.id, status: "success", createdAt: stale });
    const report = runWebhookAuditPurge(db, { retentionDays: 30, maxPerUser: 0 });
    expect(report.deletedByAge).toBe(2);
    const remaining = db.prepare("SELECT COUNT(*) as n FROM webhook_runs").get() as { n: number };
    expect(remaining.n).toBe(1);
  });
  it("per-user cap keeps the most recent N", () => {
    const sessions = createSessionRepository(db);
    const audit = createWebhookAuditRepository(db);
    const s = sessions.create("user-a", "test");
    const now = Date.now();
    for (let i = 0; i < 8; i++) {
      const ts = new Date(now - i * 1000).toISOString();
      audit.create({ webhookId: "w", userId: "user-a", sessionId: s.id, status: "success", createdAt: ts });
    }
    const report = runWebhookAuditPurge(db, { retentionDays: 0, maxPerUser: 3 });
    expect(report.deletedByCap).toBe(5);
    const remaining = db.prepare("SELECT COUNT(*) as n FROM webhook_runs").get() as { n: number };
    expect(remaining.n).toBe(3);
  });
  it("listForSession enforces user_id", () => {
    const sessions = createSessionRepository(db);
    const audit = createWebhookAuditRepository(db);
    const sa = sessions.create("user-a", "test");
    const sb = sessions.create("user-b", "test");
    audit.create({ webhookId: "w", userId: "user-a", sessionId: sa.id, status: "success" });
    audit.create({ webhookId: "w", userId: "user-b", sessionId: sb.id, status: "success" });
    expect(audit.listForSession("user-a", sa.id)).toHaveLength(1);
    expect(audit.listForSession("user-b", sb.id)).toHaveLength(1);
    expect(audit.listForSession("user-a", sa.id)[0]?.user_id).toBe("user-a");
  });
 });
--- a/apps/api/test/rag-client.test.ts
+++ b/apps/api/test/rag-client.test.ts
@@ -0,0 +1,118 @@
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
 import { createServer, type Server } from "node:http";
 let server: Server;
 let port = 0;
 beforeEach(async () => {
  await new Promise<void>((resolve) => {
    server = createServer((req, res) => {
      const url = new URL(req.url ?? "/", `http://${req.headers.host}`);
      // Decode the path so ids with reserved characters (e.g. "runbooks:vpn")
      // match whether the client encoded the colon as %3A or not.
      const pathname = decodeURIComponent(url.pathname);
      if (req.method === "POST" && pathname === "/search") {
        let body = "";
        req.on("data", (c) => (body += c));
        req.on("end", () => {
          res.writeHead(200, { "content-type": "application/json" });
          res.end(
            JSON.stringify({
              items: [
                { id: "remote:1", title: "Remote doc", source: "remote", tags: ["remote"], relevance: 0.9, excerpt: "x" },
              ],
            }),
          );
        });
        return;
      }
      if (req.method === "GET" && pathname === "/docs/remote:1") {
        res.writeHead(200, { "content-type": "application/json" });
        res.end(
          JSON.stringify({
            id: "remote:1",
            title: "Remote doc",
            source: "remote",
            tags: ["remote"],
            headings: ["Section"],
            content: "Full remote content",
          }),
        );
        return;
      }
      res.writeHead(404).end();
    });
    server.listen(0, "127.0.0.1", () => {
      const a = server.address();
      port = typeof a === "object" && a ? a.port : 0;
      resolve();
    });
  });
 });
 afterEach(async () => {
  await new Promise<void>((resolve) => server.close(() => resolve()));
 });
 describe("rag client", () => {
  it("searches via the configured endpoint when set", async () => {
    const { searchViaRag } = await import("../src/rag/client.js");
    const items = await searchViaRag(
      {
        endpoint: `http://127.0.0.1:${port}`,
        authToken: "",
        timeoutMs: 5000,
        fallbackToLocal: false,
        chunkStrategy: "heading",
        chunkSizeChars: 1500,
        topK: 5,
        minRelevance: 0,
        includeTags: [],
        excludeTags: [],
      },
      "anything",
      3,
    );
    expect(items).toHaveLength(1);
    expect(items[0]?.id).toBe("remote:1");
    expect(items[0]?.relevance).toBe(0.9);
  });
  it("fetches a single doc via the endpoint", async () => {
    const { getViaRag } = await import("../src/rag/client.js");
    const doc = await getViaRag(
      {
        endpoint: `http://127.0.0.1:${port}`,
        authToken: "secret",
        timeoutMs: 5000,
        fallbackToLocal: false,
        chunkStrategy: "heading",
        chunkSizeChars: 1500,
        topK: 5,
        minRelevance: 0,
        includeTags: [],
        excludeTags: [],
      },
      "remote:1",
    );
    expect(doc?.id).toBe("remote:1");
    expect(doc?.content).toBe("Full remote content");
  });
  it("isRagRemote returns true when endpoint is set, false otherwise", async () => {
    const { isRagRemote } = await import("../src/rag/client.js");
    const base = {
      authToken: "",
      timeoutMs: 1000,
      fallbackToLocal: true,
      chunkStrategy: "heading" as const,
      chunkSizeChars: 1500,
      topK: 5,
      minRelevance: 0,
      includeTags: [],
      excludeTags: [],
    };
    expect(isRagRemote({ ...base, endpoint: "" })).toBe(false);
    expect(isRagRemote({ ...base, endpoint: "http://x" })).toBe(true);
  });
 });
--- a/apps/api/test/rate-limit.test.ts
+++ b/apps/api/test/rate-limit.test.ts
@@ -0,0 +1,57 @@
 import { describe, expect, it } from "vitest";
 import { createRateLimiter } from "../src/rate-limit.js";
 describe("rate-limit", () => {
  it("accepts up to burst then rejects", () => {
    const lim = createRateLimiter({ perMinute: 60, burst: 3 });
    expect(lim.consume("u1", 0)).toEqual({ ok: true, remaining: 2 });
    expect(lim.consume("u1", 0)).toEqual({ ok: true, remaining: 1 });
    expect(lim.consume("u1", 0)).toEqual({ ok: true, remaining: 0 });
    const denied = lim.consume("u1", 0);
    expect(denied.ok).toBe(false);
    if (!denied.ok) {
      expect(denied.retryAfterMs).toBeGreaterThan(0);
      expect(denied.retryAfterMs).toBeLessThanOrEqual(1000);
    }
  });
  it("refills tokens over time", () => {
    const lim = createRateLimiter({ perMinute: 60, burst: 2 });
    expect(lim.consume("u1", 0).ok).toBe(true);
    expect(lim.consume("u1", 0).ok).toBe(true);
    expect(lim.consume("u1", 0).ok).toBe(false);
    // 1 second later, 1 token refilled
    expect(lim.consume("u1", 1000).ok).toBe(true);
    expect(lim.consume("u1", 1000).ok).toBe(false);
  });
  it("isolates buckets per id", () => {
    const lim = createRateLimiter({ perMinute: 60, burst: 1 });
    expect(lim.consume("u1", 0).ok).toBe(true);
    expect(lim.consume("u1", 0).ok).toBe(false);
    // u2 has its own bucket
    expect(lim.consume("u2", 0).ok).toBe(true);
    expect(lim.consume("u2", 0).ok).toBe(false);
  });
  it("caps refill at burst", () => {
    const lim = createRateLimiter({ perMinute: 60, burst: 2 });
    // Wait a long time, tokens should still be capped at 2
    const result = lim.consume("u1", 60_000);
    expect(result).toEqual({ ok: true, remaining: 1 });
    expect(lim.consume("u1", 60_000).ok).toBe(true);
    expect(lim.consume("u1", 60_000).ok).toBe(false);
  });
  it("reset clears a single bucket or all", () => {
    const lim = createRateLimiter({ perMinute: 60, burst: 1 });
    lim.consume("u1", 0);
    lim.consume("u2", 0);
    expect(lim.size()).toBe(2);
    lim.reset("u1");
    expect(lim.size()).toBe(1);
    expect(lim.consume("u1", 0).ok).toBe(true);
    lim.reset();
    expect(lim.size()).toBe(0);
  });
 });
--- a/apps/api/tsconfig.json
+++ b/apps/api/tsconfig.json
@@ -0,0 +1,8 @@
 {
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "outDir": "dist",
    "rootDir": "src"
  },
  "include": ["src"]
 }
--- a/apps/web/index.html
+++ b/apps/web/index.html
@@ -0,0 +1,28 @@
 <!doctype html>
 <html lang="es">
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />
    <meta name="color-scheme" content="dark light" />
    <title>SIC — Super Incident Commander</title>
    <link id="favicon" rel="icon" type="image/png" href="/favicon-dark.png" />
    <script>
      // Apply persisted theme before paint so the favicon/logo match the saved preference.
      (function () {
        try {
          var stored = window.localStorage.getItem("supr.theme");
          var theme = stored === "light" ? "light" : "dark";
          document.documentElement.dataset.theme = theme;
          var fav = document.getElementById("favicon");
          if (fav) fav.href = theme === "light" ? "/favicon-light.png" : "/favicon-dark.png";
        } catch (error) {
          /* localStorage unavailable, keep defaults */
        }
      })();
    </script>
  </head>
  <body>
    <div id="root"></div>
    <script type="module" src="/src/main.tsx"></script>
  </body>
 </html>
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -0,0 +1,25 @@
 {
  "name": "@pi-chat/web",
  "private": true,
  "version": "0.1.0",
  "type": "module",
  "scripts": {
    "dev": "vite --host 0.0.0.0",
    "typecheck": "tsc --noEmit",
    "lint": "tsc --noEmit"
  },
  "dependencies": {
    "@pi-chat/shared": "workspace:*",
    "@vitejs/plugin-react": "^4.5.2",
    "react": "^19.1.0",
    "react-dom": "^19.1.0",
    "react-markdown": "^10.1.0",
    "remark-gfm": "^4.0.1",
    "vite": "^6.3.5"
  },
  "devDependencies": {
    "@types/react": "^19.1.8",
    "@types/react-dom": "^19.1.6",
    "typescript": "^5.8.3"
  }
 }
--- a/apps/web/public/agent.png
+++ b/apps/web/public/agent.png
--- a/apps/web/public/favicon-dark.png
+++ b/apps/web/public/favicon-dark.png
--- a/apps/web/public/favicon-light.png
+++ b/apps/web/public/favicon-light.png
--- a/apps/web/public/logo-dark.png
+++ b/apps/web/public/logo-dark.png
--- a/apps/web/public/logo-light.png
+++ b/apps/web/public/logo-light.png
--- a/apps/web/src/DocModal.tsx
+++ b/apps/web/src/DocModal.tsx
@@ -0,0 +1,85 @@
 import { useEffect } from "react";
 import { MarkdownView } from "./code-block";
 export type KnowledgeDoc = {
  id: string;
  title: string;
  source: string;
  tags: string[];
  owner?: string;
  updated?: string;
  headings: string[];
  content: string;
 };
 type DocModalProps = {
  doc: KnowledgeDoc;
  onClose: () => void;
  labels: {
    close: string;
    tags: string;
    owner: string;
    updated: string;
  };
 };
 const DocModal = ({ doc, onClose, labels }: DocModalProps) => {
  useEffect(() => {
    const onKey = (event: KeyboardEvent) => {
      if (event.key === "Escape") onClose();
    };
    window.addEventListener("keydown", onKey);
    return () => window.removeEventListener("keydown", onKey);
  }, [onClose]);
  return (
    <div
      aria-label={doc.title}
      className="docModalBackdrop"
      onClick={onClose}
      role="dialog"
    >
      <article className="docModal" onClick={(event) => event.stopPropagation()}>
        <header className="docModalHeader">
          <div>
            <h2>{doc.title}</h2>
            <small>{doc.source}</small>
          </div>
          <button
            aria-label={labels.close}
            className="iconToggle"
            onClick={onClose}
            type="button"
          >
            ✕
          </button>
        </header>
        <dl className="docMeta">
          {doc.tags.length > 0 ? (
            <div>
              <dt>{labels.tags}</dt>
              <dd>{doc.tags.join(", ")}</dd>
            </div>
          ) : null}
          {doc.owner ? (
            <div>
              <dt>{labels.owner}</dt>
              <dd>{doc.owner}</dd>
            </div>
          ) : null}
          {doc.updated ? (
            <div>
              <dt>{labels.updated}</dt>
              <dd>{doc.updated}</dd>
            </div>
          ) : null}
        </dl>
        <div className="docModalBody">
          <MarkdownView source={doc.content} />
        </div>
      </article>
    </div>
  );
 };
 export default DocModal;
--- a/apps/web/src/ErrorBoundary.tsx
+++ b/apps/web/src/ErrorBoundary.tsx
@@ -0,0 +1,54 @@
 import { Component, type ReactNode } from "react";
 type ErrorBoundaryProps = {
  children: ReactNode;
 };
 type ErrorBoundaryState = {
  error: Error | null;
 };
 export class ErrorBoundary extends Component<ErrorBoundaryProps, ErrorBoundaryState> {
  state: ErrorBoundaryState = { error: null };
  static getDerivedStateFromError(error: Error): ErrorBoundaryState {
    return { error };
  }
  componentDidCatch(error: Error, info: { componentStack?: string }): void {
    // Surface to the dev console; production telemetry would hook in here.
    // eslint-disable-next-line no-console
    console.error("[SIC] uncaught render error", error, info.componentStack);
  }
  private handleReload = () => {
    window.location.reload();
  };
  private handleReset = () => {
    this.setState({ error: null });
  };
  render() {
    const { error } = this.state;
    if (!error) return this.props.children;
    return (
      <div className="errorBoundary" role="alert">
        <div className="errorBoundaryCard">
          <strong className="panelHeading">{`Unrecoverable UI error`}</strong>
          <p>Something went wrong while rendering SIC. Your sessions and messages are still saved on the server.</p>
          <pre className="errorBoundaryMessage">{error.message}</pre>
          <div className="errorBoundaryActions">
            <button onClick={this.handleReset} type="button">
              {`Try again`}
            </button>
            <button className="primaryAction" onClick={this.handleReload} type="button">
              {`Reload page`}
            </button>
          </div>
        </div>
      </div>
    );
  }
 }
--- a/apps/web/src/WebhookFormTab.tsx
+++ b/apps/web/src/WebhookFormTab.tsx
@@ -0,0 +1,231 @@
 import { useEffect, useMemo, useState } from "react";
 import type { FormEvent } from "react";
 import { authorizedHeaders, authTokenFromStorage, jsonHeaders, api } from "./api";
 type PublicWebhook = {
  id: string;
  label: string;
  description?: string;
  method: string;
  required_roles: string[];
  confirmation_required: boolean;
 };
 type RunResult =
  | { kind: "idle" }
  | { kind: "running" }
  | { kind: "success"; responseStatus: number | null; runId: string }
  | { kind: "error"; message: string };
 type WebhookFormTabProps = {
  webhookId: string;
  sessionId: string;
  onBack: () => void;
 };
 const labelsEn = {
  title: "Run webhook",
  description: "Description",
  requiredRoles: "Required roles",
  confirmation: "Requires confirmation",
  method: "Method",
  payload: "Payload (optional JSON)",
  payloadHelp: "These fields are merged with the backend template. Available variables: {user}, {session}, {message}.",
  run: "Run",
  running: "Running...",
  resultOk: "Webhook executed",
  resultErr: "Failed to execute",
  httpStatus: "HTTP",
  runId: "Audit ID",
  back: "Back to chat",
  notFound: "Webhook not found or insufficient permissions",
  loading: "Loading webhook...",
  user: "User",
  session: "Session",
 };
 const detectLanguage = (): "en" => "en";
 const WebhookFormTabInner = ({ webhookId, sessionId, onBack }: WebhookFormTabProps) => {
  const [labels] = useState(() => labelsEn);
  const [webhook, setWebhook] = useState<PublicWebhook | null>(null);
  const [payload, setPayload] = useState("{}");
  const [result, setResult] = useState<RunResult>({ kind: "idle" });
  const [error, setError] = useState<string | null>(null);
  useEffect(() => {
    void (async () => {
      try {
        const data = await api<{ items: PublicWebhook[] }>("/api/webhooks");
        const found = data.items.find((item) => item.id === webhookId);
        if (!found) {
          setError(labels.notFound);
          return;
        }
        setWebhook(found);
      } catch (err) {
        console.error(err);
        setError(labels.notFound);
      }
    })();
  }, [webhookId, labels.notFound]);
  const submit = async (event: FormEvent) => {
    event.preventDefault();
    if (!webhook) return;
    if (webhook.confirmation_required) {
      const ok = window.confirm(`Run ${webhook.label}?`);
      if (!ok) return;
    }
    let parsed: Record<string, unknown> = {};
    if (payload.trim().length > 0) {
      try {
        const value = JSON.parse(payload);
        if (value && typeof value === "object" && !Array.isArray(value)) {
          parsed = value as Record<string, unknown>;
        }
      } catch {
        setResult({ kind: "error", message: "Payload is not valid JSON" });
        return;
      }
    }
    setResult({ kind: "running" });
    try {
      const response = await fetch(`/api/webhooks/${webhook.id}/run`, {
        method: "POST",
        headers: jsonHeaders(),
        body: JSON.stringify({
          sessionId,
          confirmed: true,
          lastUserMessage: undefined,
          payload: parsed,
        }),
      });
      if (!response.ok) {
        const detail = await response.text().catch(() => "");
        throw new Error(`http_${response.status}: ${detail.slice(0, 200)}`);
      }
      const body = (await response.json()) as { id: string; response_status: number | null };
      setResult({ kind: "success", responseStatus: body.response_status, runId: body.id });
    } catch (err) {
      console.error(err);
      const message = err instanceof Error ? err.message : "error";
      setResult({ kind: "error", message });
    }
  };
  const tokenInfo = useMemo(() => {
    const t = authTokenFromStorage();
    return t ? `${t.slice(0, 12)}…` : labels.notFound;
  }, [labels.notFound]);
  if (error) {
    return (
      <main className="formTab error">
        <h1>{labels.title}</h1>
        <p className="muted">{error}</p>
        <button type="button" onClick={onBack}>{labels.back}</button>
      </main>
    );
  }
  if (!webhook) {
    return (
      <main className="formTab loading">
        <h1>{labels.title}</h1>
        <p className="muted">{labels.loading}</p>
      </main>
    );
  }
  return (
    <main className="formTab">
      <header className="formTabHeader">
        <div>
          <small>SIC</small>
          <h1>{webhook.label}</h1>
        </div>
        <button type="button" onClick={onBack}>{labels.back}</button>
      </header>
      <dl className="formTabMeta">
        {webhook.description ? (
          <div>
            <dt>{labels.description}</dt>
            <dd>{webhook.description}</dd>
          </div>
        ) : null}
        <div>
          <dt>{labels.method}</dt>
          <dd><code>{webhook.method}</code></dd>
        </div>
        <div>
          <dt>{labels.requiredRoles}</dt>
          <dd>{webhook.required_roles.join(", ")}</dd>
        </div>
        <div>
          <dt>{labels.confirmation}</dt>
          <dd>{webhook.confirmation_required ? "Yes" : "No"}</dd>
        </div>
        <div>
          <dt>{labels.session}</dt>
          <dd><code>{sessionId.slice(0, 8)}…</code></dd>
        </div>
        <div>
          <dt>{labels.user}</dt>
          <dd><code>{tokenInfo}</code></dd>
        </div>
      </dl>
      <form onSubmit={submit} className="formTabForm">
        <label>
          <span>{labels.payload}</span>
          <textarea
            onChange={(e) => setPayload(e.target.value)}
            rows={8}
            spellCheck={false}
            value={payload}
          />
        </label>
        <small className="muted">{labels.payloadHelp}</small>
        <button
          className="formTabRun"
          disabled={result.kind === "running"}
          type="submit"
        >
          {result.kind === "running" ? labels.running : labels.run}
        </button>
      </form>
      {result.kind === "success" ? (
        <section className="formTabResult success">
          <strong>{labels.resultOk}</strong>
          <small>{labels.httpStatus}: {result.responseStatus ?? "—"}</small>
          <small>{labels.runId}: <code>{result.runId}</code></small>
        </section>
      ) : null}
      {result.kind === "error" ? (
        <section className="formTabResult error">
          <strong>{labels.resultErr}</strong>
          <small>{result.message}</small>
        </section>
      ) : null}
    </main>
  );
 };
 // Read query params helper for the main App.
 export const getWebhookFormTabParams = () => {
  const params = new URLSearchParams(window.location.search);
  const webhook = params.get("webhook");
  const session = params.get("session");
  if (!webhook || !session) return null;
  return { webhookId: webhook, sessionId: session };
 };
 const WebhookFormTab = WebhookFormTabInner;
 export default WebhookFormTab;
 export type { WebhookFormTabProps };
--- a/apps/web/src/api.ts
+++ b/apps/web/src/api.ts
@@ -0,0 +1,93 @@
 export const AUTH_TOKEN_STORAGE_KEY = "pi-chat.authToken";
 export const authTokenFromStorage = () => {
  const stored = window.localStorage.getItem(AUTH_TOKEN_STORAGE_KEY)?.trim();
  const configured = import.meta.env.VITE_AUTH_TOKEN?.trim();
  return stored || configured || null;
 };
 // Only adds the Authorization header. Does NOT set content-type so callers
 // that send a body without content-type don't get rejected by Fastify
 // (DELETE/PATCH with content-type: application/json and an empty body 400s).
 export const authorizedHeaders = (headers?: HeadersInit) => {
  const result = new Headers(headers);
  const token = authTokenFromStorage();
  if (token && !result.has("authorization")) {
    const authorization = token.toLowerCase().startsWith("bearer ") ? token : `Bearer ${token}`;
    result.set("authorization", authorization);
  }
  return result;
 };
 // Convenience for requests that send a JSON body.
 export const jsonHeaders = (headers?: HeadersInit) => {
  const result = authorizedHeaders(headers);
  if (!result.has("content-type")) {
    result.set("content-type", "application/json");
  }
  return result;
 };
 export const api = async <T,>(path: string, init?: RequestInit): Promise<T> => {
  // Only set content-type when there's actually a body. DELETE / PATCH /
  // GET through this helper without an explicit body must NOT trigger the
  // "Body cannot be empty when content-type is set to 'application/json'"
  // 400 in Fastify. This makes `api()` safe for any verb.
  const hasBody = init?.body !== undefined && init?.body !== null;
  const headers = hasBody ? jsonHeaders(init?.headers) : authorizedHeaders(init?.headers);
  const response = await fetch(path, { ...init, headers });
  if (!response.ok) {
    throw new Error(`api_error:${response.status}`);
  }
  // 204 No Content has no body to parse; don't blow up trying.
  if (response.status === 204) return undefined as T;
  // Some servers return 200 with empty body; guard the JSON parse too.
  const text = await response.text();
  if (text.length === 0) return undefined as T;
  return JSON.parse(text) as T;
 };
 export const parseMetadata = (metadata: string | null) => {
  if (!metadata) return null;
  try {
    return JSON.parse(metadata) as {
      docs?: unknown[];
      actions?: Array<{ id: string }>;
      model?: string;
      usage?: {
        promptTokens?: number;
        completionTokens?: number;
        totalTokens?: number;
        cachedTokens?: number;
        durationMs?: number;
      };
    };
  } catch {
    return null;
  }
 };
 export const formatDuration = (ms: number): string => {
  if (ms < 1000) return `${ms} ms`;
  if (ms < 60_000) return `${(ms / 1000).toFixed(1)} s`;
  const minutes = Math.floor(ms / 60_000);
  const seconds = Math.floor((ms % 60_000) / 1000);
  return `${minutes}m ${seconds}s`;
 };
 export const formatNumber = (n: number): string => {
  if (n >= 1000) return `${(n / 1000).toFixed(n >= 10_000 ? 0 : 1)}k`;
  return String(n);
 };
 export const temporaryId = () => {
  const randomUUID = window.crypto?.randomUUID?.bind(window.crypto);
  if (randomUUID) return randomUUID();
  return `tmp-${Date.now()}-${Math.random().toString(36).slice(2)}`;
 };
 export const formatScore = (value: unknown) =>
  typeof value === "number" ? value.toFixed(2) : "s/d";
--- a/apps/web/src/code-block.tsx
+++ b/apps/web/src/code-block.tsx
@@ -0,0 +1,267 @@
 import { useEffect, useRef, useState } from "react";
 import type { ReactNode } from "react";
 import Markdown from "react-markdown";
 import remarkGfm from "remark-gfm";
 const copyIcon = (
  <svg
    aria-hidden="true"
    viewBox="0 0 24 24"
    width="14"
    height="14"
    fill="none"
    stroke="currentColor"
    strokeWidth="2"
    strokeLinecap="round"
    strokeLinejoin="round"
  >
    <rect x="9" y="9" width="11" height="11" rx="2" />
    <path d="M5 15V5a2 2 0 0 1 2-2h10" />
  </svg>
 );
 const downloadIcon = (
  <svg
    aria-hidden="true"
    viewBox="0 0 24 24"
    width="14"
    height="14"
    fill="none"
    stroke="currentColor"
    strokeWidth="2"
    strokeLinecap="round"
    strokeLinejoin="round"
  >
    <path d="M12 4v12" />
    <path d="M6 12l6 6 6-6" />
    <path d="M4 20h16" />
  </svg>
 );
 const checkIcon = (
  <svg
    aria-hidden="true"
    viewBox="0 0 24 24"
    width="14"
    height="14"
    fill="none"
    stroke="currentColor"
    strokeWidth="2.4"
    strokeLinecap="round"
    strokeLinejoin="round"
  >
    <path d="M5 12.5l4.5 4.5L19 7" />
  </svg>
 );
 const writeToClipboard = async (text: string): Promise<boolean> => {
  if (!text) return false;
  try {
    await navigator.clipboard.writeText(text);
    return true;
  } catch {
    // Fallback for non-secure contexts.
    const textarea = document.createElement("textarea");
    textarea.value = text;
    textarea.style.position = "fixed";
    textarea.style.opacity = "0";
    document.body.appendChild(textarea);
    textarea.select();
    let ok = false;
    try {
      ok = document.execCommand("copy");
    } catch {
      ok = false;
    } finally {
      document.body.removeChild(textarea);
    }
    return ok;
  }
 };
 const CodeBlock = ({ children }: { children?: ReactNode }) => {
  const [copied, setCopied] = useState(false);
  const codeRef = useRef<HTMLElement | null>(null);
  const timerRef = useRef<number | null>(null);
  useEffect(() => {
    return () => {
      if (timerRef.current !== null) {
        window.clearTimeout(timerRef.current);
      }
    };
  }, []);
  const handleCopy = async () => {
    const text = codeRef.current?.innerText ?? "";
    await writeToClipboard(text);
    setCopied(true);
    if (timerRef.current !== null) {
      window.clearTimeout(timerRef.current);
    }
    timerRef.current = window.setTimeout(() => setCopied(false), 1500);
  };
  const captureCodeRef = (element: HTMLElement | null) => {
    codeRef.current = element?.querySelector("code") ?? null;
  };
  return (
    <div className="codeBlock">
      <button
        aria-label={copied ? "Copied" : "Copy code"}
        className={`codeCopy${copied ? " copied" : ""}`}
        onClick={handleCopy}
        title={copied ? "Copied" : "Copy code"}
        type="button"
      >
        {copied ? checkIcon : copyIcon}
      </button>
      <pre ref={captureCodeRef}>{children}</pre>
    </div>
  );
 };
 type TableCell = { text: string; isHeader: boolean };
 const readTable = (table: HTMLTableElement): { headers: string[]; rows: string[][] } => {
  const rows = Array.from(table.querySelectorAll("tr"));
  const matrix: TableCell[][] = rows.map((row) =>
    Array.from(row.querySelectorAll("th,td")).map((cell) => ({
      text: (cell.textContent ?? "").replace(/\s+/g, " ").trim(),
      isHeader: cell.tagName.toLowerCase() === "th",
    })),
  );
  if (matrix.length === 0) return { headers: [], rows: [] };
  // If the first row is a header row, split it; otherwise synthesize generic headers.
  const firstRow = matrix[0] ?? [];
  const hasHeader = firstRow.length > 0 && firstRow.every((cell) => cell.isHeader);
  const headers = hasHeader
    ? firstRow.map((cell) => cell.text)
    : (matrix[0] ?? []).map((_, index) => `Column ${index + 1}`);
  const dataRows = hasHeader ? matrix.slice(1) : matrix;
  return {
    headers,
    rows: dataRows.map((row) => row.map((cell) => cell.text)),
  };
 };
 const toTsv = (headers: string[], rows: string[][]): string => {
  const escape = (value: string) => value.replace(/\t/g, " ").replace(/\n/g, " ");
  return [headers, ...rows].map((row) => row.map(escape).join("\t")).join("\n");
 };
 const toCsv = (headers: string[], rows: string[][]): string => {
  // RFC 4180: wrap fields containing comma, quote, or newline in double quotes;
  // escape internal double quotes by doubling them.
  const escape = (value: string) => {
    if (/[",\n\r]/.test(value)) {
      return `"${value.replace(/"/g, '""')}"`;
    }
    return value;
  };
  return [headers, ...rows].map((row) => row.map(escape).join(",")).join("\r\n");
 };
 const downloadFile = (filename: string, content: string, mime: string) => {
  const blob = new Blob([content], { type: mime });
  const url = URL.createObjectURL(blob);
  const link = document.createElement("a");
  link.href = url;
  link.download = filename;
  link.rel = "noreferrer";
  document.body.appendChild(link);
  link.click();
  document.body.removeChild(link);
  // Defer revoke so the download starts cleanly.
  window.setTimeout(() => URL.revokeObjectURL(url), 0);
 };
 const TableBlock = ({ children }: { children?: ReactNode }) => {
  const [copied, setCopied] = useState(false);
  const tableRef = useRef<HTMLTableElement | null>(null);
  const timerRef = useRef<number | null>(null);
  useEffect(() => {
    return () => {
      if (timerRef.current !== null) {
        window.clearTimeout(timerRef.current);
      }
    };
  }, []);
  const flashCopied = () => {
    setCopied(true);
    if (timerRef.current !== null) {
      window.clearTimeout(timerRef.current);
    }
    timerRef.current = window.setTimeout(() => setCopied(false), 1500);
  };
  const handleCopy = async () => {
    if (!tableRef.current) return;
    const { headers, rows } = readTable(tableRef.current);
    await writeToClipboard(toTsv(headers, rows));
    flashCopied();
  };
  const handleDownloadCsv = () => {
    if (!tableRef.current) return;
    const { headers, rows } = readTable(tableRef.current);
    const filename = `table-${new Date().toISOString().replace(/[:.]/g, "-")}.csv`;
    // Prefix with UTF-8 BOM so Excel opens it correctly.
    downloadFile(filename, `\uFEFF${toCsv(headers, rows)}`, "text/csv;charset=utf-8");
  };
  return (
    <div className="tableBlock">
      <div className="tableBlockActions">
        <button
          aria-label="Download table as CSV"
          className="tableDownload"
          onClick={handleDownloadCsv}
          title="Download as CSV"
          type="button"
        >
          {downloadIcon}
        </button>
        <button
          aria-label={copied ? "Copied" : "Copy table as TSV"}
          className={`tableCopy${copied ? " copied" : ""}`}
          onClick={handleCopy}
          title={copied ? "Copied" : "Copy as TSV"}
          type="button"
        >
          {copied ? checkIcon : copyIcon}
        </button>
      </div>
      <div className="tableScroll">
        <table ref={tableRef}>{children}</table>
      </div>
    </div>
  );
 };
 export const normalizeMarkdown = (value: string) =>
  value.replace(/\\n/g, "\n").replace(/\\t/g, "\t");
 export const markdownComponents = {
  a: ({ href, children }: { href?: string; children?: ReactNode }) => (
    <a href={href} rel="noreferrer" target="_blank">
      {children}
    </a>
  ),
  pre: ({ children }: { children?: ReactNode }) => <CodeBlock>{children}</CodeBlock>,
  table: ({ children }: { children?: ReactNode }) => <TableBlock>{children}</TableBlock>,
 };
 export const MarkdownView = ({ source }: { source: string }) => (
  <Markdown components={markdownComponents} remarkPlugins={[remarkGfm]} skipHtml>
    {normalizeMarkdown(source)}
  </Markdown>
 );
 export { CodeBlock };
--- a/apps/web/src/main.tsx
+++ b/apps/web/src/main.tsx
--- a/apps/web/src/styles.css
+++ b/apps/web/src/styles.css
--- a/apps/web/src/vite-env.d.ts
+++ b/apps/web/src/vite-env.d.ts
@@ -0,0 +1 @@
 /// <reference types="vite/client" />
--- a/apps/web/tsconfig.json
+++ b/apps/web/tsconfig.json
@@ -0,0 +1,8 @@
 {
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "jsx": "react-jsx",
    "outDir": "dist"
  },
  "include": ["src", "vite.config.ts"]
 }
--- a/apps/web/vite.config.ts
+++ b/apps/web/vite.config.ts
@@ -0,0 +1,14 @@
 import { defineConfig } from "vite";
 import react from "@vitejs/plugin-react";
 export default defineConfig({
  plugins: [react()],
  server: {
    port: Number(process.env.WEB_PORT ?? 3000),
    proxy: {
      "/api": "http://localhost:8787",
      "/healthz": "http://localhost:8787",
      "/readyz": "http://localhost:8787",
    },
  },
 });
--- a/config/docs.yml
+++ b/config/docs.yml
@@ -0,0 +1,5 @@
 sources:
  - id: knowledge
    path: ./knowledge
    include:
      - "**/*.md"
--- a/config/mcp.yml
+++ b/config/mcp.yml
@@ -0,0 +1,75 @@
 # MCP (Model Context Protocol) tool catalog.
 #
 # Phase 1: this is a DECLARATIVE catalog of available tools. The SIC backend
 # does NOT call any MCP server directly. The LLM sees these tools in its
 # context (so it can reason about what's possible) and the right panel
 # renders them as recommendations that the user can inspect. Execution is
 # reserved for the future MCP runtime, which will run a per-request MCP
 # client; for now every tool here is "inspect only".
 #
 # Each tool:
 #   id            stable identifier
 #   name          human-readable label
 #   description   what the tool does (sent to the LLM verbatim)
 #   server        optional reference to a configured MCP server (Phase 2+)
 #   parameters    JSON Schema for the tool arguments
 #   tags          free-form tags for filtering
 #   enabled       whether the tool is exposed at all
 mcp_servers: []
 mcp_tools:
  - id: kb.search
    name: Search knowledge base
    description: |
      Search the internal knowledge base (runbooks, SOPs, architecture docs) for
      documents relevant to the query. Returns matching document ids and
      relevance scores. Does NOT execute anything.
    server: null
    parameters:
      type: object
      required: [query]
      properties:
        query:
          type: string
          description: Natural-language search query.
        limit:
          type: integer
          minimum: 1
          maximum: 20
          description: Maximum results to return. Defaults to 5.
    tags: [search, knowledge, docs]
    enabled: true
  - id: sessions.list
    name: List recent chat sessions
    description: |
      List the calling user's most recent chat sessions, ordered by updated_at
      descending. Does NOT execute anything; returns metadata only.
    server: null
    parameters:
      type: object
      required: []
      properties:
        limit:
          type: integer
          minimum: 1
          maximum: 50
          description: Maximum sessions to return. Defaults to 10.
    tags: [sessions, listing]
    enabled: true
  - id: webhooks.usage
    name: Webhook usage stats
    description: |
      Return per-webhook usage stats for the calling user over a recent time
      window (defaults to 7 days). Read-only.
    server: null
    parameters:
      type: object
      required: []
      properties:
        days:
          type: integer
          minimum: 1
          maximum: 90
          description: Window in days. Defaults to 7.
    tags: [webhooks, audit, read-only]
    enabled: true
--- a/config/models.yml
+++ b/config/models.yml
@@ -0,0 +1,29 @@
 models:
  - id: fast
    label: MiniMax Fast
    provider: openai-compatible
    base_url: ${LLM_BASE_URL}
    model: MiniMax-M2.7-highspeed
    max_tokens: 1024
  - id: balanced
    label: MiniMax Balanced
    provider: openai-compatible
    base_url: ${LLM_BASE_URL}
    model: MiniMax-M2.7
    max_tokens: 2048
    # If MiniMax fails (5xx, 429, timeout, network) fall through to mr-auto.
    fallback:
      - mr-auto
  - id: reasoning
    label: MiniMax Reasoning
    provider: openai-compatible
    base_url: ${LLM_BASE_URL}
    model: MiniMax-M3
    max_tokens: 4096
  - id: mr-auto
    label: MR Auto (llm.rikrdo.com)
    provider: openai-compatible
    base_url: https://llm.rikrdo.com/v1
    api_key_env: MR_AUTO_API_KEY
    model: mr-auto
    max_tokens: 8192
--- a/config/n8n-workflows.yml
+++ b/config/n8n-workflows.yml
@@ -0,0 +1,35 @@
 # n8n-style external workflow links.
 #
 # These are NOT executed by the SIC backend. Each entry is a deep link into
 # an n8n (or similar) workflow runner that the user can open in a new tab.
 # The LLM can recommend them as "external" suggestions; the right panel
 # renders them with a distinct icon and the action only opens a new tab.
 #
 # Fields:
 #   id            stable identifier (kebab-case recommended)
 #   label         human-readable label
 #   description   what the workflow does
 #   url           absolute external URL (env vars supported)
 #   required_roles user roles allowed to see this link (optional, [] = any)
 #   tags          free-form tags to help retrieval / filtering
 n8n_workflows:
  - id: n8n-vpn-restart
    label: Restart VPN tunnel (n8n)
    description: Opens the n8n workflow that restarts the VPN tunnel after credential rotation.
    url: ${N8N_BASE_URL:?set N8N_BASE_URL}/workflow/vpn-restart
    required_roles:
      - webhook-runner
      - sre
    tags: [vpn, network, automation]
  - id: n8n-incident-summary
    label: Generate incident summary (n8n)
    description: Opens the n8n workflow that drafts an incident summary from the current session.
    url: ${N8N_BASE_URL:?set N8N_BASE_URL}/workflow/incident-summary
    required_roles: []
    tags: [incident, automation, reporting]
  - id: n8n-postmortem
    label: Open postmortem workflow (n8n)
    description: Opens the postmortem workflow in n8n with the current session context.
    url: ${N8N_BASE_URL:?set N8N_BASE_URL}/workflow/postmortem
    required_roles: []
    tags: [postmortem, automation]
--- a/config/rag.yml
+++ b/config/rag.yml
@@ -0,0 +1,42 @@
 # Retrieval-Augmented Generation (RAG) configuration.
 #
 # SIC treats the knowledge base as an external service. The RAG service is
 # expected to expose:
 #   POST {endpoint}/search
 #     body: { query, limit, min_relevance, include_tags, exclude_tags }
 #     returns: { items: [{ id, title, source, tags, relevance, excerpt, content? }] }
 #   GET  {endpoint}/docs/:id
 #     returns: { id, title, source, tags, owner?, updated?, headings, content }
 #
 # For local dev (or when no endpoint is configured) the docs repository
 # falls back to reading Markdown files from `knowledge/` and applying the
 # token-overlap scoring in apps/api/src/docs/repository.ts.
 #
 # Fields:
 #   endpoint           external RAG service base URL (no trailing slash).
 #                      Leave empty to use the local fallback.
 #   auth_token         optional bearer token sent in the Authorization header.
 #   timeout_ms         HTTP request timeout. Default: 10000.
 #   fallback_to_local when true (default), use the local knowledge/ directory
 #                      if the external endpoint fails. Set to false to fail
 #                      closed.
 #   chunk_strategy     how to split a Markdown doc into chunks (local mode only)
 #     - "heading"  : split on H1/H2/H3, each chunk is a section
 #     - "paragraph": split on blank lines, each chunk is a paragraph block
 #     - "fixed"    : split on a fixed character length (chunk_size_chars)
 #   chunk_size_chars   only used by "fixed" strategy (local mode only)
 #   top_k              max chunks returned per query
 #   min_relevance      chunks with relevance below this are dropped
 #   include_tags       optional global include filter
 #   exclude_tags       optional global exclude filter
 rag:
  endpoint: ${RAG_ENDPOINT_URL:}
  auth_token: ${RAG_AUTH_TOKEN:}
  timeout_ms: 10000
  fallback_to_local: true
  chunk_strategy: heading
  chunk_size_chars: 1500
  top_k: 5
  min_relevance: 0.0
  include_tags: []
  exclude_tags: []
--- a/config/skills.yml
+++ b/config/skills.yml
@@ -0,0 +1,43 @@
 # Skills are prompt fragments injected into the LLM's system prompt.
 # They are persona/behavior customizations, NOT capabilities: the model still
 # only recommends actions and the backend executes them.
 #
 # Fields:
 #   id          stable identifier (kebab-case recommended)
 #   name        human-readable label
 #   description what the skill does (safe to expose via /api/skills)
 #   enabled     whether the fragment is injected (true/false)
 #   prompt      the system prompt fragment to inject
 #
 # All skills are loaded at API boot. Restart the API after editing this file.
 skills:
  - id: sre-on-call
    name: SRE on-call mode
    description: Respond as a senior SRE handling a production page.
    enabled: true
    prompt: |
      You are responding as a senior SRE who is on-call. Be concise and operational.
      Always reference the runbook ids from internal_docs when relevant. Prefer
      concrete actions over abstract advice. When the user's intent is ambiguous,
      ask one short clarifying question instead of guessing.
  - id: blameless-postmortem
    name: Blameless postmortem writer
    description: Help write blameless postmortems using the standard template.
    enabled: true
    prompt: |
      When the user asks for a postmortem or incident review, follow the
      postmortem runbook template exactly. Use blameless language: focus on
      systemic causes and contributing factors, never on individual blame.
      The output must include: Summary, Timeline, Root cause, What went well,
      What went wrong, Corrective actions, Lessons learned.
  - id: security-incident
    name: Security incident response
    description: Guide containment and coordination for security incidents.
    enabled: false
    prompt: |
      When the user describes a security incident, prioritize containment and
      evidence preservation before root-cause analysis. Recommend involving the
      Security IR team and the Communications Lead. Never suggest actions that
      would destroy forensic evidence.
--- a/config/webhooks.yml
+++ b/config/webhooks.yml
@@ -0,0 +1,81 @@
 webhooks:
  - id: vpn-diagnostic
    label: Run VPN diagnostic
    description: Runs a diagnostic on the VPN server and returns latency and health metrics.
    method: POST
    url: ${WEBHOOK_VPN_DIAGNOSTIC_URL}
    required_roles:
      - webhook-runner
    confirmation_required: true
    payload_template:
      source: pi-chat
      user_id: "{{user.id}}"
      session_id: "{{session.id}}"
      last_user_message: "{{chat.last_user_message}}"
  - id: service-restart
    label: Restart service
    description: Restarts a system service. The service name is read from the payload.
    method: POST
    url: ${WEBHOOK_SERVICE_RESTART_URL}
    required_roles:
      - webhook-runner
      - sre
    confirmation_required: true
    payload_template:
      source: pi-chat
      user_id: "{{user.id}}"
      session_id: "{{session.id}}"
      service: "{{payload.service}}"
      last_user_message: "{{chat.last_user_message}}"
  - id: dns-flush
    label: Flush local DNS
    description: Clears the local DNS cache and returns the result.
    method: POST
    url: ${WEBHOOK_DNS_FLUSH_URL}
    required_roles:
      - webhook-runner
    confirmation_required: true
    payload_template:
      source: pi-chat
      user_id: "{{user.id}}"
      session_id: "{{session.id}}"
      last_user_message: "{{chat.last_user_message}}"
  - id: disk-cleanup
    label: Clean /tmp
    description: Removes files in /tmp older than 7 days.
    method: POST
    url: ${WEBHOOK_DISK_CLEANUP_URL}
    required_roles:
      - webhook-runner
      - sre
    confirmation_required: true
    payload_template:
      source: pi-chat
      user_id: "{{user.id}}"
      session_id: "{{session.id}}"
      last_user_message: "{{chat.last_user_message}}"
  - id: log-tail
    label: Tail service log
    description: Returns the last N lines of a service log.
    method: GET
    url: ${WEBHOOK_LOG_TAIL_URL}
    required_roles:
      - webhook-runner
    confirmation_required: false
    payload_template:
      service: "{{payload.service}}"
      lines: "{{payload.lines}}"
  - id: cache-purge
    label: Purge CDN cache
    description: Invalidates the CDN cache for the provided paths.
    method: POST
    url: ${WEBHOOK_CACHE_PURGE_URL}
    required_roles:
      - webhook-runner
    confirmation_required: true
    payload_template:
      source: pi-chat
      user_id: "{{user.id}}"
      session_id: "{{session.id}}"
      paths: "{{payload.paths}}"
      last_user_message: "{{chat.last_user_message}}"
--- a/data/.gitkeep
+++ b/data/.gitkeep
--- a/deploy/docker-compose.yml
+++ b/deploy/docker-compose.yml
@@ -0,0 +1,27 @@
 services:
  api:
    build:
      context: ..
      dockerfile: deploy/docker/api.Dockerfile
    ports:
      - "8787:8787"
    volumes:
      - ../data:/app/data
      - ../config:/app/config
      - ../knowledge:/app/knowledge
    environment:
      AUTH_MODE: local
      DATABASE_URL: sqlite:///app/data/pi-chat.db
      API_PORT: 8787
      LLM_BASE_URL: https://api.minimax.io/v1
      LLM_API_KEY: ${MINIMAX_API_KEY:?set MINIMAX_API_KEY}
      DEFAULT_MODEL: fast
  web:
    build:
      context: ..
      dockerfile: deploy/docker/web.Dockerfile
    ports:
      - "3000:3000"
    depends_on:
      - api
--- a/deploy/docker/api.Dockerfile
+++ b/deploy/docker/api.Dockerfile
@@ -0,0 +1,9 @@
 FROM node:22-alpine
 WORKDIR /app
 COPY package.json pnpm-workspace.yaml tsconfig.base.json ./
 COPY apps/api ./apps/api
 COPY packages ./packages
 RUN corepack enable && pnpm install --frozen-lockfile=false
 WORKDIR /app/apps/api
 EXPOSE 8787
 CMD ["pnpm", "dev"]
--- a/deploy/docker/web.Dockerfile
+++ b/deploy/docker/web.Dockerfile
@@ -0,0 +1,9 @@
 FROM node:22-alpine
 WORKDIR /app
 COPY package.json pnpm-workspace.yaml tsconfig.base.json ./
 COPY apps/web ./apps/web
 COPY packages ./packages
 RUN corepack enable && pnpm install --frozen-lockfile=false
 WORKDIR /app/apps/web
 EXPOSE 3000
 CMD ["pnpm", "dev"]
--- a/docs/agents/api-agent.md
+++ b/docs/agents/api-agent.md
@@ -0,0 +1,17 @@
 # API Agent
 Owns the Fastify backend.
 ## Focus
 - Design HTTP/SSE contracts first.
 - Persist every critical state in SQLite.
 - Validate ownership with `session_id + user_id`.
 - Emit JSON logs.
 - Keep `/healthz` and `/readyz` simple.
 ## Do not
 - Do not keep sessions in memory.
 - Do not expose real webhook URLs to clients.
 - Do not execute webhooks without explicit confirmation.
--- a/docs/agents/pi-adapter-agent.md
+++ b/docs/agents/pi-adapter-agent.md
@@ -0,0 +1,14 @@
 # PI Adapter Agent
 Owns isolating the `pi.dev` / LLM provider runtime.
 ## Focus
 - Expose a stable contract to the backend.
 - Support OpenAI-compatible providers.
 - Return a structured response: `answer`, `recommended_actions`, `internal_docs`.
 ## Do not
 - Do not mix backend HTTP rules with model logic.
 - Do not let the model execute tools directly in Phase 1.
--- a/docs/agents/security-reliability-agent.md
+++ b/docs/agents/security-reliability-agent.md
@@ -0,0 +1,11 @@
 # Security & Reliability Agent
 Owns reviewing isolation, audit, and execution rules.
 ## Checklist
 - Every message query filters by `session_id` AND `user_id`.
 - Every webhook validates roles before being shown and before being executed.
 - Every execution is recorded in `webhook_runs`.
 - The frontend never receives real webhook URLs.
 - No critical state lives only in memory.
--- a/docs/agents/web-agent.md
+++ b/docs/agents/web-agent.md
@@ -0,0 +1,15 @@
 # Web Agent
 Owns the React + Vite UI.
 ## Focus
 - Three-column layout: sessions, chat, right panel.
 - Consume SSE from `/api/chat/stream`.
 - Show recommended actions without auto-executing them.
 - Rebuild state from the API, not from local memory as the source of truth.
 ## Do not
 - Do not call webhooks directly from the browser.
 - Do not store tokens or secrets in the frontend.
--- a/docs/observabilidad-prompt.md
+++ b/docs/observabilidad-prompt.md
@@ -0,0 +1,2 @@
 === PROMPT ===
 === RESPUESTA ===
--- a/docs/product-definition.md
+++ b/docs/product-definition.md
@@ -0,0 +1,11 @@
 # Short definition
 `SIC — Super Incident Commander` is a multi-session web interface for consulting a centralized `pi.dev` engine, with persistent history, simple search over internal documentation, and webhook recommendations that are only executed from the backend after explicit user confirmation.
 ## Target user
 Small team, up to 5 concurrent users.
 ## Successful MVP
 A user opens the UI, creates or resumes a session, asks a question, receives a streamed response, sees related documentation, gets recommended actions, and can execute a confirmed webhook. Everything is persisted and auditable.
--- a/docs/reliable-history.md
+++ b/docs/reliable-history.md
@@ -0,0 +1,57 @@
 # Reliable History
 ## Goal
 Guarantee that the chat history is reconstructible, isolated by user, and consistent even if the backend restarts.
 ## Mandatory rules
 1. Persist the user message before calling the LLM.
 2. Persist the assistant response when the stream finishes.
 3. If the LLM fails, record the failure in metadata or as a controlled error message.
 4. Do not keep critical conversational state in memory.
 5. All session and message queries must filter by `session_id` AND `user_id`.
 6. Webhooks must be audited even when they fail.
 ## Base tables
 ```sql
 CREATE TABLE IF NOT EXISTS chat_sessions (
  id TEXT PRIMARY KEY,
  user_id TEXT NOT NULL,
  title TEXT,
  created_at TEXT NOT NULL,
  updated_at TEXT NOT NULL
 );
 CREATE TABLE IF NOT EXISTS chat_messages (
  id TEXT PRIMARY KEY,
  session_id TEXT NOT NULL,
  user_id TEXT NOT NULL,
  role TEXT NOT NULL,
  content TEXT NOT NULL,
  metadata TEXT,
  created_at TEXT NOT NULL,
  FOREIGN KEY (session_id) REFERENCES chat_sessions(id)
 );
 CREATE TABLE IF NOT EXISTS webhook_runs (
  id TEXT PRIMARY KEY,
  webhook_id TEXT NOT NULL,
  user_id TEXT NOT NULL,
  session_id TEXT NOT NULL,
  status TEXT NOT NULL,
  request_payload TEXT,
  response_status INTEGER,
  created_at TEXT NOT NULL
 );
 ```
 ## Security invariant
 ```sql
 WHERE session_id = ?
 AND user_id = ?
 ```
 Without this filter, the query is incorrectly designed.
--- a/knowledge/runbooks/disk-cleanup.md
+++ b/knowledge/runbooks/disk-cleanup.md
@@ -0,0 +1,25 @@
 ---
 title: Disk Cleanup Runbook
 tags: [disk, cleanup, storage, operations]
 owner: sre
 updated: 2026-06-12
 ---
 # Disk Cleanup Runbook
 ## When to use it
 - `disk usage > 85%` alert on /tmp or /var.
 - Job failures with `No space left on device`.
 - Before scheduled node maintenance.
 ## Procedure
 1. List candidate files: `find /tmp -type f -mtime +7`.
 2. Confirm none are in use by an active process.
 3. Run the `disk-cleanup` webhook to remove /tmp files older than 7 days.
 4. Re-verify disk usage.
 ## Related webhooks
 - disk-cleanup
--- a/knowledge/runbooks/dns-flush.md
+++ b/knowledge/runbooks/dns-flush.md
@@ -0,0 +1,24 @@
 ---
 title: DNS Flush Runbook
 tags: [dns, network, cache, troubleshooting]
 owner: netops
 updated: 2026-06-10
 ---
 # DNS Flush Runbook
 ## Symptoms
 - DNS resolutions return stale IPs.
 - Users report that a site "works on some machines and not on others".
 - Recent DNS changes are not propagating.
 ## Diagnosis
 1. Check the local cache with `ipconfig /displaydns` or `resolvectl statistics`.
 2. Confirm the upstream resolver is responding.
 3. Run the `dns-flush` webhook on the affected machine.
 ## Related webhooks
 - dns-flush
--- a/knowledge/runbooks/incident-response-long.md
+++ b/knowledge/runbooks/incident-response-long.md
@@ -0,0 +1,280 @@
 ---
 title: Production Incident Response Runbook (long-form)
 tags: [incident, production, sre, on-call, runbook, master]
 owner: sre
 updated: 2026-06-28
 ---
 # Production Incident Response Runbook (long-form)
 > This runbook is designed to exercise the UI: it contains nested headings, lists, tables, code blocks, blockquotes, links, and enough volume to force scroll in the modal. Use it as a reference during drills and to validate the look of the documentation viewer.
 ## Table of contents
 1. [Purpose and scope](#purpose-and-scope)
 2. [Severities and SLAs](#severities-and-slas)
 3. [Roles and responsibilities](#roles-and-responsibilities)
 4. [Response flow](#response-flow)
 5. [Initial diagnosis](#initial-diagnosis)
 6. [Common incident patterns](#common-incident-patterns)
 7. [Useful commands](#useful-commands)
 8. [Available webhooks](#available-webhooks)
 9. [Escalation](#escalation)
 10. [Post-mortem](#post-mortem)
 11. [Appendix: glossary](#appendix-glossary)
 ## Purpose and scope
 This runbook defines the standard procedure for responding to production incidents that affect the availability, integrity, or performance of critical services. It applies to every engineering and operations team that maintains services in scope of SIC.
 ### When to use this runbook
 - Partial or total service outages.
 - Severe performance degradation (p99 latency > agreed SLA).
 - Confirmed or suspected data loss or corruption.
 - Security alerts with production impact.
 ### When NOT to use this runbook
 - Failures in dev or staging environments without user impact.
 - Change requests or scheduled maintenance.
 - HR or administrative process incidents.
 ## Severities and SLAs
 | Severity | Definition | Ack SLA | Mitigation SLA | Communication |
 | --- | --- | --- | --- | --- |
 | **SEV-1** | Total outage or data loss | 5 minutes | 60 minutes | Every 15 min |
 | **SEV-2** | Severe degradation, affects > 30% of users | 10 minutes | 2 hours | Every 30 min |
 | **SEV-3** | Partial degradation, affects < 30% of users | 30 minutes | 8 hours | Every 2 hours |
 | **SEV-4** | Cosmetic, no functional impact | 1 business day | Next sprint | Async |
 > **Important**: severity can go up or down as the incident evolves. Document every change in the incident channel with a timestamp.
 ## Roles and responsibilities
 - **Incident Commander (IC)**: coordinates the response, does not run technical tasks. The only person who can declare the incident resolved.
 - **Comms Lead**: handles communication to stakeholders, status page, and customers.
 - **Tech Lead**: leads the technical investigation, assigns tasks to the response team.
 - **Subject Matter Expert (SME)**: provides system-specific knowledge for the affected service.
 - **Scribe**: documents the incident timeline in real time.
 ## Response flow
 1. **Detect**: alert, user report, or proactive monitoring.
 2. **Triage**: classify severity and assign an IC in under 5 minutes.
 3. **Convene**: open a bridge and the #inc-YYYYMMDD-XX channel.
 4. **Mitigate**: apply changes to restore service. The root cause can wait.
 5. **Resolve**: confirm the service is stable. Close the incident.
 6. **Post-mortem**: within 5 business days, blameless.
 ### Flow diagram
 ```mermaid
 graph TD
  A[Detect] --> B{Triage}
  B -->|SEV-1/2| C[Open bridge]
  B -->|SEV-3/4| D[Assign owner]
  C --> E[Investigate]
  D --> E
  E --> F{Mitigation?}
  F -->|Yes| G[Apply fix]
  F -->|No| H[Escalate]
  G --> I[Monitor]
  I --> J{Stable?}
  J -->|Yes| K[Close]
  J -->|No| E
  H --> E
  K --> L[Post-mortem]
 ```
 ## Initial diagnosis
 Before going deeper, run the following steps in order:
 1. Check the overall service health dashboard.
 2. Review the last hour of production changes (`deploy log`).
 3. Check active alerts in the monitoring system.
 4. Confirm the failure is not user-side (DNS, local network).
 ### Triage checklist
 - [ ] Affected service identified
 - [ ] Severity assigned
 - [ ] IC identified
 - [ ] Bridge open
 - [ ] Communication channel created
 - [ ] Status page updated
 - [ ] Comms lead assigned
 ## Common incident patterns
 ### Pattern A: latency spike
 **Symptoms**: p99 latency rises from 200 ms to > 2 s without proportional traffic increase.
 **Typical causes**:
 - DB connection pool saturation.
 - Massive cache miss (accidental invalidation).
 - Long JVM garbage collection.
 **Immediate actions**:
 1. Check DB metrics (connections, locks, slow queries).
 2. Validate cache hit rate.
 3. If no cause is identified in 5 min, escalate to the service SME.
 ### Pattern B: cascading 5xx errors
 **Symptoms**: sudden increase of HTTP 500/502/503 on one or more endpoints.
 **Typical causes**:
 - Upstream service down.
 - Invalid configuration deployed.
 - External resource (third-party API) unavailable.
 **Immediate actions**:
 1. Identify the failing upstream service.
 2. Review the last deploy touching that path.
 3. If the deploy is to blame, consider a rollback.
 ### Pattern C: data loss
 **Symptoms**: customers report missing or inconsistent data.
 **Typical causes**:
 - Cleanup job that deleted more than intended.
 - Schema migration executed with a bug.
 - Bug in business logic.
 **Immediate actions**:
 1. **Stop** any job that could make things worse.
 2. Evaluate whether a recent and viable backup can be restored.
 3. Escalate immediately to the engineering lead.
 ## Useful commands
 ### Check connectivity
 ```bash
 # DNS
 dig +short example.com
 # Basic HTTP
 curl -sSI https://api.example.com/health
 # TCP to a specific port
 nc -zv db.internal 5432
 ```
 ### Inspect logs live
 ```bash
 # Last 100 lines and follow
 kubectl logs -n prod deploy/api --tail=100 -f
 # Logs from the last 5 minutes
 kubectl logs -n prod deploy/api --since=5m
 # Logs of a specific pod
 kubectl logs -n prod api-7d4f8b9c-x2k9n --tail=200
 ```
 ### Quick metrics
 ```bash
 # CPU per pod
 kubectl top pods -n prod
 # Memory per pod
 kubectl top pods -n prod --containers
 # Disk usage of a node
 ssh node-01 df -h
 ```
 ## Available webhooks
 | Webhook | When to use it | Requires confirmation |
 | --- | --- | --- |
 | `vpn-diagnostic` | VPN access issues | Yes |
 | `service-restart` | Hung or zombie service | Yes |
 | `dns-flush` | Broken DNS resolution | No |
 | `disk-cleanup` | Disk > 90% | Yes |
 | `log-tail` | Need logs in real time | No |
 | `cache-purge` | Stale or corrupt cache | Yes |
 > Remember: webhook execution always requires explicit confirmation from the user who triggers it. The LLM can only recommend them; it must never execute them directly.
 ## Escalation
 If the incident is not mitigated within the agreed SLA:
 1. Notify the area's on-call manager.
 2. If it exceeds 2 hours, notify the engineering director.
 3. If customers are impacted, involve Customer Success.
 4. If there is monetary or data loss, notify Legal and the C-level.
 ### Emergency contacts
 ```text
 SRE on-call:    +54 11 5555-0001
 Platform lead:  +54 11 5555-0002
 Security IR:    +54 11 5555-0003
 CTO:            +54 11 5555-0004
 ```
 ## Post-mortem
 Within 5 business days after closing the incident:
 1. Schedule a meeting with everyone involved.
 2. Share the post-mortem document 24 h in advance.
 3. During the meeting: review the timeline, identify the root cause.
 4. Document an action plan with owners and dates.
 5. Share learnings with the rest of the organization.
 ### Post-mortem template
 ```markdown
 # Post-mortem: <title>
 ## Summary
 <2-3 sentences about what happened and what the impact was>
 ## Timeline
 - HH:MM - <event>
 - HH:MM - <event>
 ## Root cause
 <technical description of the cause>
 ## What went well
 - <item>
 - <item>
 ## What went wrong
 - <item>
 - <item>
 ## Corrective actions
 - [ ] <action> - owner: <person> - due: <date>
 - [ ] <action> - owner: <person> - due: <date>
 ## Lessons learned
 <actionable insights for the team and the organization>
 ```
 ## Appendix: glossary
 - **IC**: Incident Commander.
 - **SME**: Subject Matter Expert.
 - **SLA**: Service Level Agreement.
 - **p99**: 99th percentile of latency.
 - **Blameless**: culture where the post-mortem looks for systemic causes, not blame.
 - **Rollback**: reverting a change to the previous version.
 - **Mitigation**: action to reduce impact, not necessarily the root cause.
 - **Resolution**: confirmation that the system is stable.
 ---
 > If you find outdated or missing information in this runbook, edit the file and notify the SRE team. The source of truth is always the repository, not PDFs attached in Confluence.
--- a/knowledge/runbooks/incident-response.md
+++ b/knowledge/runbooks/incident-response.md
@@ -0,0 +1,36 @@
 ---
 title: Incident Response Framework
 tags: [incident, response, framework, sev, runbook]
 owner: sre
 updated: 2026-06-20
 ---
 # Incident Response Framework
 ## Severities
 - **SEV1**: total outage. Page on-call. Mitigate first, post-mortem after.
 - **SEV2**: significant degradation. Ticket + stakeholder communication.
 - **SEV3**: minor impact. Normal ticket.
 ## Steps
 1. **Detect**: automatic alert or report.
 2. **Triage**: identify scope and severity.
 3. **Mitigate**: apply runbook or workaround before the root-cause fix.
 4. **Communicate**: status page and stakeholders every 30 min for SEV1.
 5. **Resolve**: apply the root-cause fix.
 6. **Post-mortem**: blameless, within 5 business days.
 ## Roles
 - Incident Commander
 - Communications Lead
 - Subject Matter Expert
 ## Related webhooks
 - service-restart
 - dns-flush
 - disk-cleanup
 - log-tail
--- a/knowledge/runbooks/service-restart.md
+++ b/knowledge/runbooks/service-restart.md
@@ -0,0 +1,32 @@
 ---
 title: Service Restart Runbook
 tags: [service, restart, systemd, operations]
 owner: sre
 updated: 2026-06-15
 ---
 # Service Restart Runbook
 ## When to use it
 - The service is down or not responding to health checks.
 - Sustained performance drop that cannot be explained by load.
 - After a deploy that left the service in an inconsistent state.
 ## Diagnosis
 1. Confirm the current state: `systemctl status <service>` or equivalent.
 2. Review the last 200 lines of the log.
 3. Check dependencies (DB, Redis, network).
 4. If there is no clear cause, escalate via the `service-restart` webhook.
 ## Equivalent command
 ```bash
 systemctl restart <service>
 ```
 ## Related webhooks
 - service-restart
 - log-tail
--- a/knowledge/runbooks/vpn.md
+++ b/knowledge/runbooks/vpn.md
@@ -0,0 +1,22 @@
 ---
 title: VPN Runbook
 tags: [vpn, network, access]
 owner: sre
 updated: 2026-06-01
 ---
 # VPN Runbook
 ## Symptoms
 Users cannot connect to the VPN or lose access intermittently.
 ## Diagnosis
 - Check the VPN service status.
 - Review gateway logs.
 - Confirm user-side connectivity.
 ## Related webhooks
 - vpn-diagnostic
--- a/knowledge/sops/log-tail.md
+++ b/knowledge/sops/log-tail.md
@@ -0,0 +1,23 @@
 ---
 title: Log Reading SOP
 tags: [logs, sops, troubleshooting, observability]
 owner: sre
 updated: 2026-06-05
 ---
 # Log Reading SOP
 ## Goal
 Retrieve the last N lines of a service log in under 30 seconds.
 ## Procedure
 1. Identify the service and the log path.
 2. Call the `log-tail` webhook with `service` and `lines`.
 3. Look for error patterns (ERROR, CRITICAL, stack traces).
 4. If there is a matching runbook, follow it.
 ## Related webhooks
 - log-tail
--- a/package.json
+++ b/package.json
@@ -0,0 +1,21 @@
 {
  "name": "pi-chat-harness",
  "private": true,
  "version": "0.1.0",
  "type": "module",
  "packageManager": "pnpm@9.15.0",
  "scripts": {
    "dev": "pnpm -r --parallel dev",
    "lint": "pnpm -r lint",
    "typecheck": "pnpm -r typecheck",
    "test": "vitest run",
    "test:watch": "vitest",
    "smoke": "node scripts/smoke.mjs",
    "smoke:mock": "node scripts/smoke.mjs --mock-llm",
    "mock:llm": "node scripts/mock-llm.mjs"
  },
  "devDependencies": {
    "typescript": "^5.8.3",
    "vitest": "^4.1.9"
  }
 }
--- a/packages/pi-adapter/package.json
+++ b/packages/pi-adapter/package.json
@@ -0,0 +1,19 @@
 {
  "name": "@pi-chat/pi-adapter",
  "private": true,
  "version": "0.1.0",
  "type": "module",
  "exports": {
    ".": "./src/index.ts"
  },
  "scripts": {
    "typecheck": "tsc --noEmit",
    "lint": "tsc --noEmit"
  },
  "dependencies": {
    "@pi-chat/shared": "workspace:*"
  },
  "devDependencies": {
    "typescript": "^5.8.3"
  }
 }
--- a/packages/pi-adapter/src/index.ts
+++ b/packages/pi-adapter/src/index.ts
@@ -0,0 +1,354 @@
 import type { ChatResult, InternalDocReference, RecommendedAction } from "@pi-chat/shared";
 export type PiChatInput = {
  message: string;
  model: string;
  docs: InternalDocReference[];
  availableActions: RecommendedAction[];
  history?: Array<{
    role: "user" | "assistant" | "system" | "tool";
    content: string;
  }>;
  skillPrompts?: string[];
  // Optional per-session system prompt override. Inserted right after the
  // base identity prompt and before skills, so it can extend the persona
  // without contradicting the strict rules in the base prompt.
  systemPrompt?: string | null;
 };
 export interface PiAdapter {
  chat(input: PiChatInput): Promise<PiChatResult>;
 }
 export type PiChatError =
  | { kind: "json_parse"; reason: string; rawContent: string }
  | { kind: "schema"; reason: string; rawContent: string }
  | { kind: "no_content"; message: string };
 export type PiChatUsage = {
  promptTokens?: number;
  completionTokens?: number;
  totalTokens?: number;
  cachedTokens?: number;
  durationMs?: number;
 };
 export type PiChatResult =
  | { ok: true; result: ChatResult; usage?: PiChatUsage }
  | { ok: false; error: PiChatError; fallback: ChatResult; usage?: PiChatUsage };
 export type OpenAICompatiblePiAdapterOptions = {
  baseUrl: string;
  apiKey: string;
  defaultModel: string;
  timeoutMs?: number;
  maxTokens?: number;
 };
 type OpenAIChatResponse = {
  choices?: Array<{
    message?: {
      content?: string;
    };
  }>;
  usage?: {
    prompt_tokens?: number;
    completion_tokens?: number;
    total_tokens?: number;
    prompt_tokens_details?: { cached_tokens?: number };
  };
 };
 const score = (value: unknown, fallback: number) => {
  if (typeof value !== "number" || Number.isNaN(value)) return fallback;
  return Math.max(0, Math.min(1, value));
 };
 // Extract an answer string from a parsed JSON object regardless of which key
 // the model picked. Tries common names first, then falls back to the longest
 // string-valued key. Returns null if parsed isn't an object or has no useful
 // string field.
 const extractAnswer = (parsed: unknown): string | null => {
  if (!parsed || typeof parsed !== "object") return null;
  const obj = parsed as Record<string, unknown>;
  const preferredKeys = ["answer", "response", "output", "text", "content", "message", "result"];
  for (const key of preferredKeys) {
    const value = obj[key];
    if (typeof value === "string" && value.trim().length > 0) return value;
  }
  let longest: { value: string; len: number } | null = null;
  for (const value of Object.values(obj)) {
    if (typeof value === "string") {
      const trimmed = value.trim();
      if (trimmed.length === 0) continue;
      // Skip keys we already treat as arrays elsewhere (handled separately).
      if (value === obj.recommended_actions || value === obj.internal_docs) continue;
      if (!longest || trimmed.length > longest.len) longest = { value: trimmed, len: trimmed.length };
    }
  }
  return longest?.value ?? null;
 };
 const normalizeResult = (
  parsed: unknown,
  fallbackAnswer: string,
  docs: InternalDocReference[],
  availableActions: RecommendedAction[],
 ): ChatResult => {
  const docsById = new Map(docs.map((doc) => [doc.id, doc]));
  const actionsById = new Map(availableActions.map((action) => [action.id, action]));
  const obj = (parsed && typeof parsed === "object" ? parsed : {}) as Record<string, unknown>;
  return {
    answer: extractAnswer(parsed) ?? fallbackAnswer,
    recommended_actions: Array.isArray(obj.recommended_actions)
      ? (obj.recommended_actions as unknown[]).flatMap((action) => {
          // Loose mode: accept either an object with id, or a bare string id.
          if (typeof action === "string") {
            if (!actionsById.has(action)) return [];
            const configured = actionsById.get(action)!;
            return [{
              type: configured.type ?? ("webhook" as const),
              id: action,
              confidence: 0,
              reason: configured.reason ?? action,
              requires_confirmation: configured.requires_confirmation ?? true,
            }];
          }
          if (!action || typeof action !== "object") return [];
          const candidate = action as Partial<RecommendedAction>;
          if (!candidate.id || !actionsById.has(candidate.id)) return [];
          const configured = actionsById.get(candidate.id)!;
          return [{
            type: "webhook" as const,
            id: candidate.id,
            confidence: score(candidate.confidence, 0),
            reason: String(candidate.reason ?? configured.reason ?? "Suggested action"),
            requires_confirmation: candidate.requires_confirmation ?? configured.requires_confirmation ?? true,
          }];
        })
      : [],
    internal_docs: Array.isArray(obj.internal_docs)
      ? (obj.internal_docs as unknown[]).flatMap((doc) => {
          // Loose mode: accept either an object with id, or a bare string id.
          if (typeof doc === "string") {
            if (!docsById.has(doc)) return [];
            const indexed = docsById.get(doc)!;
            return [{ id: doc, title: indexed.title, source: indexed.source, relevance: 0 }];
          }
          if (!doc || typeof doc !== "object") return [];
          const candidate = doc as Partial<InternalDocReference>;
          if (!candidate.id || !docsById.has(candidate.id)) return [];
          const indexed = docsById.get(candidate.id)!;
          return [{
            id: candidate.id,
            title: String(candidate.title ?? indexed.title),
            source: String(candidate.source ?? indexed.source),
            relevance: score(candidate.relevance, indexed.relevance),
          }];
        })
      : docs,
  };
 };
 const safeJsonParse = (
  value: string,
  docs: InternalDocReference[],
  availableActions: RecommendedAction[],
 ): { ok: true; result: ChatResult } | { ok: false; error: PiChatError } => {
  if (!value || value.trim().length === 0) {
    return {
      ok: false,
      error: { kind: "no_content", message: "LLM returned an empty message." },
    };
  }
  const start = value.indexOf("{");
  const end = value.lastIndexOf("}");
  if (start === -1 || end === -1 || end <= start) {
    return {
      ok: false,
      error: {
        kind: "json_parse",
        reason: "no JSON object delimiters found in response",
        rawContent: value.slice(0, 500),
      },
    };
  }
  let parsed: unknown = null;
  const slice = value.slice(start, end + 1);
  try {
    parsed = JSON.parse(slice);
  } catch {
    // Truncated JSON is a real failure mode for local models (vLLM /
    // ollama often cut output mid-array when max_tokens conflicts with
    // their actual context budget). Try to salvage the answer field via
    // regex before giving up.
    const salvaged = salvageAnswer(slice);
    if (salvaged !== null) {
      return {
        ok: true,
        result: {
          answer: salvaged,
          recommended_actions: [],
          internal_docs: docs,
        },
      };
    }
    return {
      ok: false,
      error: {
        kind: "json_parse",
        reason: "JSON.parse failed on truncated or malformed output",
        rawContent: value.slice(0, 500),
      },
    };
  }
  if (parsed === null || typeof parsed !== "object") {
    return {
      ok: false,
      error: {
        kind: "schema",
        reason: "parsed value is not an object",
        rawContent: value.slice(0, 500),
      },
    };
  }
  return { ok: true, result: normalizeResult(parsed, value, docs, availableActions) };
 };
 // Best-effort regex extraction of the answer field from truncated JSON.
 // Matches the first key named like "answer" (with optional quote-prefix
 // chars from quirky local models) and captures its string value.
 const salvageAnswer = (slice: string): string | null => {
  // Try the canonical key first, then common variants seen from local models.
  const candidates = ['answer', 'response', 'output', 'text', 'content', 'message', 'result'];
  for (const key of candidates) {
    const re = new RegExp(`["']?${key}["']?\\s*:\\s*"((?:[^"\\\\]|\\\\.)*)"`);
    const m = slice.match(re);
    if (m && m[1] && m[1].trim().length > 0) {
      return m[1].replace(/\\"/g, '"').replace(/\\n/g, "\n").replace(/\\t/g, "\t").replace(/\\\\/g, "\\");
    }
  }
  return null;
 };
 export const createOpenAICompatiblePiAdapter = (options: OpenAICompatiblePiAdapterOptions): PiAdapter => ({
  async chat(input) {
    const abortController = new AbortController();
    const timeout = setTimeout(() => abortController.abort(), options.timeoutMs ?? 30_000);
    const response = await fetch(`${options.baseUrl.replace(/\/$/, "")}/chat/completions`, {
      method: "POST",
      headers: {
        authorization: `Bearer ${options.apiKey}`,
        "content-type": "application/json",
      },
      signal: abortController.signal,
      body: JSON.stringify({
        model: input.model || options.defaultModel,
        // Always pin max_tokens to avoid hitting the cap mid-JSON and
        // emitting truncated output. Real providers honor this; servers
        // that cap harder will return a finish_reason of "length" but
        // we'll still get usable text.
        max_tokens: options.maxTokens ?? 4096,
        // Request a single JSON response. Some OpenAI-compatible servers
        // (e.g. local proxies like mr-auto) default to SSE streaming, and
        // `response.json()` below would blow up on the chunked stream.
        // Real providers like MiniMax accept and ignore this flag.
        stream: false,
        // Force JSON output for all OpenAI-compatible backends. Local
        // foundation models (qwen-local behind llm.rikrdo.com) often
        // ignore the "ALWAYS respond with JSON" system instruction and
        // default to natural language — this flag tells the server to
        // constrain the output to a JSON object. Real providers accept
        // and honor it.
        response_format: { type: "json_object" },
        messages: [
          {
            role: "system",
            content:
              "You are SIC (Super Incident Commander), an internal incident management assistant. " +
              "STRICT RULES: " +
              "1) Your reply MUST be a single JSON object (no markdown, no prose wrapper) with EXACTLY three keys: " +
              "   - \"answer\" (string): the response to the user, may include markdown for tables/lists/code. " +
              "   - \"recommended_actions\" (array): each item MUST be an object with keys id (string, present in available_actions), confidence (number 0..1), reason (string), and \"type\": \"webhook\". " +
              "   - \"internal_docs\" (array): each item MUST be an object with keys id, title, source, relevance (number 0..1). " +
              "   Example shape: {\"answer\": \"...\", \"recommended_actions\": [{\"id\":\"vpn-diagnostic\",\"type\":\"webhook\",\"confidence\":0.8,\"reason\":\"matches VPN symptoms\"}], \"internal_docs\": [{\"id\":\"runbooks:vpn\",\"title\":\"VPN Runbook\",\"source\":\"runbooks/vpn.md\",\"relevance\":0.9}]}. " +
              "2) recommended_actions may only include ids present in available_actions; never execute actions and never invent ids. The backend executes with confirmation. " +
              "3) DO NOT invent company names, owners, integrations, customers, or external facts. If the user asks something not backed by internal_docs, available_actions, or the history, say explicitly that you do not have that information. " +
              "4) When asked who you are or what company you belong to, only state that you are SIC (Super Incident Commander), an internal assistant; do not assume an owning company. " +
              "5) Whenever an available_action is contextually relevant to the user's request OR the user asks which actions exist, include its id in recommended_actions so the user can see and execute it from the right panel. The right panel renders ONLY items present in recommended_actions, so omitting them hides them.",
          },
          ...(input.skillPrompts ?? []).map((prompt) => ({
            role: "system" as const,
            content: prompt,
          })),
          ...(input.systemPrompt && input.systemPrompt.trim().length > 0
            ? [{ role: "system" as const, content: input.systemPrompt.trim() }]
            : []),
          {
            role: "system",
            content: JSON.stringify({
              internal_docs: input.docs,
              available_actions: input.availableActions,
            }),
          },
          ...(input.history ?? []).map((message) => ({
            role: message.role === "tool" ? "assistant" : message.role,
            content: message.content,
          })),
          { role: "user", content: input.message },
        ],
        temperature: 0.2,
      }),
    }).finally(() => clearTimeout(timeout));
    if (!response.ok) {
      throw new Error(`llm_request_failed:${response.status}`);
    }
    const data = (await response.json()) as OpenAIChatResponse;
    const content = data.choices?.[0]?.message?.content ?? "";
    const parsed = safeJsonParse(content, input.docs, input.availableActions);
    const usage = extractUsage(data.usage);
    if (parsed.ok) {
      return { ok: true, result: parsed.result, usage };
    }
    return {
      ok: false,
      error: parsed.error,
      fallback: {
        answer: content,
        recommended_actions: [],
        internal_docs: input.docs,
      },
      usage,
    };
  },
 });
 const extractUsage = (raw: OpenAIChatResponse["usage"]): PiChatUsage | undefined => {
  if (!raw || typeof raw !== "object") return undefined;
  const promptTokens = typeof raw.prompt_tokens === "number" ? raw.prompt_tokens : undefined;
  const completionTokens = typeof raw.completion_tokens === "number" ? raw.completion_tokens : undefined;
  const totalTokens = typeof raw.total_tokens === "number" ? raw.total_tokens : undefined;
  const cachedTokens =
    typeof raw.prompt_tokens_details?.cached_tokens === "number"
      ? raw.prompt_tokens_details.cached_tokens
      : undefined;
  if (promptTokens === undefined && completionTokens === undefined && totalTokens === undefined && cachedTokens === undefined) {
    return undefined;
  }
  return { promptTokens, completionTokens, totalTokens, cachedTokens };
 };
--- a/packages/pi-adapter/test/index.test.ts
+++ b/packages/pi-adapter/test/index.test.ts
@@ -0,0 +1,181 @@
 import { describe, expect, it } from "vitest";
 import {
  createOpenAICompatiblePiAdapter,
 } from "../src/index.js";
 describe("pi-adapter structured errors", () => {
  it("returns ok:false with no_content when LLM returns empty", async () => {
    const server = await startMockLLM({ responseContent: "" });
    try {
      const pi = createOpenAICompatiblePiAdapter({
        baseUrl: server.baseUrl,
        apiKey: "test",
        defaultModel: "fast",
      });
      const result = await pi.chat({
        message: "hi",
        model: "fast",
        docs: [],
        availableActions: [],
      });
      expect(result.ok).toBe(false);
      if (!result.ok) {
        expect(result.error.kind).toBe("no_content");
        expect(result.fallback.answer).toBe("");
        expect(result.fallback.recommended_actions).toEqual([]);
      }
    } finally {
      await server.stop();
    }
  });
  it("returns ok:false with json_parse when response has no JSON object", async () => {
    const server = await startMockLLM({ responseContent: "Just plain text answer" });
    try {
      const pi = createOpenAICompatiblePiAdapter({
        baseUrl: server.baseUrl,
        apiKey: "test",
        defaultModel: "fast",
      });
      const result = await pi.chat({
        message: "hi",
        model: "fast",
        docs: [],
        availableActions: [],
      });
      expect(result.ok).toBe(false);
      if (!result.ok) {
        expect(result.error.kind).toBe("json_parse");
        expect(result.fallback.answer).toBe("Just plain text answer");
      }
    } finally {
      await server.stop();
    }
  });
  it("returns ok:true when response is well-formed JSON", async () => {
    const server = await startMockLLM({
      responseContent: JSON.stringify({
        answer: "All good",
        recommended_actions: [],
        internal_docs: [],
      }),
    });
    try {
      const pi = createOpenAICompatiblePiAdapter({
        baseUrl: server.baseUrl,
        apiKey: "test",
        defaultModel: "fast",
      });
      const result = await pi.chat({
        message: "hi",
        model: "fast",
        docs: [],
        availableActions: [],
      });
      expect(result.ok).toBe(true);
      if (result.ok) {
        expect(result.result.answer).toBe("All good");
      }
    } finally {
      await server.stop();
    }
  });
  it("throws on non-OK HTTP response (transport error, not parse error)", async () => {
    const server = await startMockLLM({ status: 500, responseContent: "" });
    try {
      const pi = createOpenAICompatiblePiAdapter({
        baseUrl: server.baseUrl,
        apiKey: "test",
        defaultModel: "fast",
      });
      await expect(
        pi.chat({
          message: "hi",
          model: "fast",
          docs: [],
          availableActions: [],
        }),
      ).rejects.toThrow(/llm_request_failed:500/);
    } finally {
      await server.stop();
    }
  });
  it("filters recommended_actions to known ids and clamps scores", async () => {
    const server = await startMockLLM({
      responseContent: JSON.stringify({
        answer: "ok",
        recommended_actions: [
          { type: "webhook", id: "dns-flush", confidence: 2.5, reason: "x" },
          { type: "webhook", id: "unknown-id", confidence: 0.9, reason: "y" },
        ],
        internal_docs: [],
      }),
    });
    try {
      const pi = createOpenAICompatiblePiAdapter({
        baseUrl: server.baseUrl,
        apiKey: "test",
        defaultModel: "fast",
      });
      const result = await pi.chat({
        message: "hi",
        model: "fast",
        docs: [],
        availableActions: [
          { type: "webhook", id: "dns-flush", confidence: 0, reason: "r", requires_confirmation: true },
        ],
      });
      expect(result.ok).toBe(true);
      if (result.ok) {
        expect(result.result.recommended_actions).toHaveLength(1);
        expect(result.result.recommended_actions[0]?.id).toBe("dns-flush");
        expect(result.result.recommended_actions[0]?.confidence).toBe(1);
      }
    } finally {
      await server.stop();
    }
  });
 });
 import { createServer, type Server } from "node:http";
 async function startMockLLM(opts: { responseContent: string; status?: number }): Promise<{
  baseUrl: string;
  stop: () => Promise<void>;
 }> {
  let s: Server;
  return await new Promise((resolve) => {
    s = createServer((_req, res) => {
      res.writeHead(opts.status ?? 200, { "content-type": "application/json" });
      res.end(
        JSON.stringify({
          id: "mock",
          object: "chat.completion",
          created: 0,
          model: "fast",
          choices: [
            {
              index: 0,
              message: { role: "assistant", content: opts.responseContent },
              finish_reason: "stop",
            },
          ],
        }),
      );
    });
    s.listen(0, "127.0.0.1", () => {
      const address = s.address();
      const port = typeof address === "object" && address ? address.port : 0;
      resolve({
        baseUrl: `http://127.0.0.1:${port}/v1`,
        stop: () =>
          new Promise<void>((res) => {
            s!.close(() => res());
          }),
      });
    });
  });
 }
--- a/packages/pi-adapter/tsconfig.json
+++ b/packages/pi-adapter/tsconfig.json
@@ -0,0 +1,8 @@
 {
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "outDir": "dist",
    "rootDir": "src"
  },
  "include": ["src"]
 }
--- a/packages/shared/package.json
+++ b/packages/shared/package.json
@@ -0,0 +1,16 @@
 {
  "name": "@pi-chat/shared",
  "private": true,
  "version": "0.1.0",
  "type": "module",
  "exports": {
    ".": "./src/index.ts"
  },
  "scripts": {
    "typecheck": "tsc --noEmit",
    "lint": "tsc --noEmit"
  },
  "devDependencies": {
    "typescript": "^5.8.3"
  }
 }
--- a/packages/shared/src/index.ts
+++ b/packages/shared/src/index.ts
@@ -0,0 +1,33 @@
 export type AuthUser = {
  id: string;
  username?: string;
  email?: string;
  roles: string[];
 };
 export type RecommendedAction = {
  type: "webhook";
  id: string;
  confidence: number;
  reason: string;
  requires_confirmation: boolean;
  /**
   * Optional soft signal derived from the user's audit history: e.g.
   * "3 runs in last 7d, 100% success". The LLM may use this as a tiebreaker;
   * the UI uses it to show a "Most used" tag.
   */
  usageHint?: string;
 };
 export type InternalDocReference = {
  id: string;
  title: string;
  source: string;
  relevance: number;
 };
 export type ChatResult = {
  answer: string;
  recommended_actions: RecommendedAction[];
  internal_docs: InternalDocReference[];
 };
--- a/packages/shared/tsconfig.json
+++ b/packages/shared/tsconfig.json
@@ -0,0 +1,8 @@
 {
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "outDir": "dist",
    "rootDir": "src"
  },
  "include": ["src"]
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -0,0 +1,3 @@
 packages:
  - "apps/*"
  - "packages/*"
--- a/tsconfig.base.json
+++ b/tsconfig.base.json
@@ -0,0 +1,12 @@
 {
  "compilerOptions": {
    "target": "ES2022",
    "module": "ESNext",
    "moduleResolution": "Bundler",
    "strict": true,
    "esModuleInterop": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true,
    "resolveJsonModule": true
  }
 }
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -0,0 +1,10 @@
 import { defineConfig } from "vitest/config";
 export default defineConfig({
  test: {
    include: ["apps/api/test/**/*.test.ts", "packages/pi-adapter/test/**/*.test.ts"],
    environment: "node",
    testTimeout: 20_000,
    pool: "forks",
  },
 });