Initial commit: SIC harness (backend, web, pi-adapter, configs, docs)

- pnpm monorepo: apps/api (Fastify + SQLite + SSE), apps/web (React+Vite), packages/shared, packages/pi-adapter
- Local auth (admin/webhook-runner roles) + Keycloak JWT ready
- Multi-session chat with reliable history (user persisted before LLM, assistant persisted after stream)
- Markdown knowledge base with /api/docs/search + /api/docs/:id
- YAML webhook catalog with backend-only execution, retry/backoff, audit (webhook_runs), and per-user rate limit
- Skills config (sre-on-call, blameless-postmortem, security-incident) injected into LLM system prompt
- LLM provider failover chain (config/models.yml fallback + LLM_FALLBACK_CHAIN override)
- Context-aware webhooks panel + backend id-mention safety net
- Per-message stats (time/duration/tokens/model), Markdown+GFM render, code & table copy/download buttons
- Vitest suite, end-to-end smoke test (scripts/smoke.mjs), per-session system prompt override
- /metrics Prometheus endpoint + /api/metrics JSON, request-id correlation
- dotenv with explicit repo-root path; envString/envNumber helpers (handles empty-string env)
- Runbooks + SOPs under knowledge/ in English; README, docs, and INDEX.md in English
This commit is contained in:
2026-06-29 16:20:53 +02:00
commit 62728b2200
89 changed files with 11992 additions and 0 deletions

118
.env.example Normal file
View File

@@ -0,0 +1,118 @@
# =============================================================
# SIC — Super Incident Commander
# Local development environment.
#
# How to use:
# 1. Copy this file to `.env`:
# cp .env.example .env
# 2. Edit `.env` and fill in the secrets (at minimum MINIMAX_API_KEY).
# 3. Start the API:
# pnpm dev
#
# The API loads `.env` automatically via dotenv at boot. Real shell
# environment variables always win over the file, so production /
# docker setups that inject env vars keep working unchanged.
#
# Never commit a real `.env` file — it's gitignored.
# =============================================================
# ---------------------------------------------------------------
# Server
# ---------------------------------------------------------------
API_PORT=8787
HOST=0.0.0.0
API_BODY_LIMIT_BYTES=1048576
CORS_ALLOWED_ORIGINS=
WEB_PORT=3000
WEB_VITE_API_PROXY=http://localhost:8787
# ---------------------------------------------------------------
# Auth
# ---------------------------------------------------------------
# local: dev mode, returns a synthetic `local-user` with admin + webhook-runner roles
# keycloak: validates Authorization: Bearer JWT against OIDC_ISSUER/OIDC_AUDIENCE
AUTH_MODE=local
OIDC_ISSUER=https://auth.rikrdo.com/realms/homelab
OIDC_AUDIENCE=pi-chat
# ---------------------------------------------------------------
# Persistence
# ---------------------------------------------------------------
DATABASE_URL=sqlite://./data/pi-chat.db
# ---------------------------------------------------------------
# LLM provider (OpenAI-compatible)
# ---------------------------------------------------------------
# Default provider and base URL. The MiniMax and mr-auto model
# entries in config/models.yml both read these.
LLM_BASE_URL=https://api.minimax.io/v1
# Per-model fallback chain. Override the default chain parsed from
# config/models.yml. Comma-separated model ids in the order to try.
# Set to empty to disable and use the YAML-only chain.
# Example: LLM_FALLBACK_CHAIN=balanced,mr-auto
LLM_FALLBACK_CHAIN=
LLM_API_KEY=
DEFAULT_MODEL=fast
# Backwards-compat alias for the MiniMax key. Either this or LLM_API_KEY works.
# Used by chat routes as a fallback when LLM_API_KEY is empty.
MINIMAX_API_KEY=
# Per-model API key overrides (config/models.yml -> model.api_key_env).
# Only the mr-auto model needs this; MiniMax shares LLM_API_KEY.
MR_AUTO_API_KEY=
LLM_TIMEOUT_MS=30000
# ---------------------------------------------------------------
# Chat input limits
# ---------------------------------------------------------------
CHAT_MESSAGE_MAX_CHARS=8000
# ---------------------------------------------------------------
# Rate limits
# ---------------------------------------------------------------
# /api/chat/stream — per authenticated user
CHAT_RATE_LIMIT_PER_MINUTE=20
CHAT_RATE_LIMIT_BURST=5
# POST /api/webhooks/:id/run — per webhook id (across all users)
WEBHOOK_RATE_LIMIT_PER_MINUTE=60
WEBHOOK_RATE_LIMIT_BURST=10
# ---------------------------------------------------------------
# Webhook execution
# ---------------------------------------------------------------
WEBHOOK_TIMEOUT_MS=15000
WEBHOOK_RETRY_MAX_ATTEMPTS=3
WEBHOOK_RETRY_INITIAL_BACKOFF_MS=500
WEBHOOK_RETRY_MAX_BACKOFF_MS=5000
# Audit retention
WEBHOOK_RUNS_RETENTION_DAYS=30
WEBHOOK_RUNS_MAX_PER_USER=1000
WEBHOOK_AUDIT_PURGE_INTERVAL_MS=3600000
# Webhook usage stats window (days) for /api/webhooks/usage
WEBHOOK_USAGE_WINDOW_DAYS=7
# ---------------------------------------------------------------
# Config file paths
# ---------------------------------------------------------------
# Each config file can be overridden with an env var. Defaults
# resolve relative to apps/api (where the API is started).
MODELS_CONFIG_PATH=
WEBHOOKS_CONFIG_PATH=
SKILLS_CONFIG_PATH=
KNOWLEDGE_DIR=
N8N_CONFIG_PATH=
RAG_CONFIG_PATH=
MCP_CONFIG_PATH=
# ---------------------------------------------------------------
# Development
# ---------------------------------------------------------------
# Set to `1` to enable verbose Fastify logging.
DEBUG=0

39
.gitignore vendored Normal file
View File

@@ -0,0 +1,39 @@
node_modules/
dist/
.env
.env.local
data/*.db
data/*.db-*
**/data/*.db
**/data/*.db-*
**/data/*.db-shm
**/data/*.db-wal
*.log
# Local agent / codebase caches
.atl/
.codebase-memory/
INDEX.md
scripts/*.mjs
!scripts/*.mjs.bak
# Editor / OS
.DS_Store
.idea/
.vscode/
# Build artifacts
apps/api/dist/
apps/web/dist/
packages/*/dist/
# Logs
logs/
*.log
*.pid
*.seed
*.pid.lock
# Coverage
coverage/
.nyc_output/

273
README.md Normal file
View File

@@ -0,0 +1,273 @@
# SIC — Super Incident Commander
Lightweight web harness to use a centralized `pi.dev` engine from the browser, with independent sessions, reliable history in SQLite, internal Markdown documentation, and webhooks executed only from the backend after explicit user confirmation.
## MVP scope
- Expected ceiling: 5 concurrent users.
- Frontend: React + Vite.
- Backend: Node.js + Fastify.
- Initial persistence: SQLite.
- LLM: OpenAI-compatible endpoint via `pi-adapter`.
- Default LLM provider: MiniMax OpenAI-compatible.
- Configuration: YAML + environment variables.
- Initial deploy: Docker Compose.
## Reliability principle
Nothing critical lives only in memory. Sessions, messages, and webhook audit are rebuilt from SQLite.
Every conversation read/write must respect:
```sql
WHERE session_id = ?
AND user_id = ?
```
## Structure
```text
apps/
api/ # Fastify API, SSE, sessions, webhooks, docs
web/ # React + Vite UI
packages/
shared/ # Shared types
pi-adapter/ # pi.dev / OpenAI-compatible adapter
config/ # YAML for models, webhooks and docs
knowledge/ # Internal Markdown documentation
deploy/ # Docker Compose and future manifests
docs/ # Definitions, reliable history and agents
scripts/ # End-to-end smoke test + mock LLM
```
## API surface
- `GET /healthz`
- `GET /readyz`
- `GET /api/version`
- `GET /api/me`
- `GET /api/sessions`
- `POST /api/sessions`
- `GET /api/sessions/:id`
- `PATCH /api/sessions/:id`
- `DELETE /api/sessions/:id`
- `GET /api/docs/search?q=vpn`
- `GET /api/docs/:id`
- `GET /api/models`
- `GET /api/webhooks`
- `GET /api/webhook-runs?sessionId=...`
- `POST /api/webhooks/:id/run`
- `GET /api/skills`
- `PATCH /api/sessions/:id/system-prompt` — set per-session context
- `GET /metrics` — Prometheus text
- `GET /api/metrics` — same as JSON
- `POST /api/chat/stream`
## Chat stream contract
`POST /api/chat/stream` takes `sessionId`, `message` and optionally `model`.
Reliability rules:
1. Validate that the session belongs to the current user.
2. Persist the `user` message before calling the LLM.
3. If the session has no title yet, derive a short one from the first message.
4. Validate the requested model against `config/models.yml`.
5. Search relevant Markdown docs and role-allowed webhooks.
6. Call the OpenAI-compatible endpoint via `pi-adapter`. If the model has a fallback chain, the chat route walks it on structured or transport errors; the first `ok=true` response wins.
7. Emit SSE events: `docs`, `token`, `actions`, `done`.
8. Persist the `assistant` response; if every model in the chain fails, persist a controlled message with error metadata and the full failure trail.
### Provider fallback
Each model in `config/models.yml` can declare `fallback: [other-id, ...]`. The chat route walks the chain when a model returns `ok=false` (no_content / json_parse / schema) or throws (5xx / 429 / network / timeout). When the assistant metadata is persisted, it includes `requested_model`, `fallback_attempts`, `fallback_chain`, and `fallback_failures` whenever the chain was actually used, so you can see what happened in the chat history.
Override the chain globally with `LLM_FALLBACK_CHAIN` (comma-separated ids, first entry after the requested model). Leave empty to use each model's YAML chain.
Default chain today (from `config/models.yml`):
- `fast` → no fallback (it IS the cheap path)
- `balanced``mr-auto`
- `reasoning` → no fallback
- `mr-auto` → no fallback
## MiniMax
The project is wired to MiniMax via the official OpenAI-compatible endpoint:
- Base URL: `https://api.minimax.io/v1`
- Chat path used by the adapter: `/chat/completions`
- Auth: `Authorization: Bearer <key>`
Models configured in `config/models.yml`:
- `fast``MiniMax-M2.7-highspeed`
- `balanced``MiniMax-M2.7`
- `reasoning``MiniMax-M3`
To run locally, set the key:
```bash
export MINIMAX_API_KEY="your-key"
export LLM_BASE_URL="https://api.minimax.io/v1"
export LLM_API_KEY="$MINIMAX_API_KEY"
export DEFAULT_MODEL="fast"
```
In Docker Compose you only need to export `MINIMAX_API_KEY`; the compose maps it to `LLM_API_KEY`.
## UI MVP
The React app already consumes the API through the Vite proxy:
- Loads or creates a local session.
- Loads `GET /api/models` and lets the user pick the model per message.
- Lists persisted sessions and lets the user switch between them.
- Lets the user rename and delete sessions, always through the API with per-user isolation.
- Sends messages to `POST /api/chat/stream` and consumes SSE events.
- Shows recommended documentation and lets the user open the full document via `GET /api/docs/:id`.
- Shows suggested actions in the right panel.
- Loads `GET /api/webhooks` to show public labels/descriptions for actions.
- Executes webhooks only after user confirmation and always through the backend.
- Shows execution audit per session from `GET /api/webhook-runs`, without exposing URLs or payload templates.
- Can attach a development Bearer token to test `AUTH_MODE=keycloak`; reads from `localStorage` or `VITE_AUTH_TOKEN`.
## Skills
Skills are persona/behavior prompt fragments loaded from `config/skills.yml` and injected into the LLM's system prompt at chat time. They are NOT capabilities: the model still only recommends actions and the backend still owns execution.
Each skill has: `id`, `name`, `description`, `enabled`, `prompt`. Skills with `enabled: true` are injected into the chat system prompt (after the base identity prompt, before the docs/actions context). Skills with `enabled: false` are kept in the file but inactive. The frontend can list them via `GET /api/skills` (no prompt text is exposed publicly — only id, name, description, enabled).
Edit `config/skills.yml` and restart the API to change the active skill set. The default file ships with `sre-on-call` and `blameless-postmortem` enabled; `security-incident` is shipped disabled as a reference.
The env var `SKILLS_CONFIG_PATH` overrides the default config path (`../../config/skills.yml` relative to `cwd`).
## Per-session context
Every session has an optional `system_prompt` field. When set, it is prepended to every chat turn as a system message (after the base identity prompt and skill prompts, before the docs/actions context). Use it to pin incident id, on-call name, or runbook references that shouldn't drift across the conversation.
- **Frontend**: each session row has a small circle button (`○` empty, `●` set). Click it to open a modal editor with Save and Clear.
- **API**: `PATCH /api/sessions/:id/system-prompt` with `{ "system_prompt": "..." }`. Send `null` or empty string to clear.
- **Limit**: 8000 characters.
- **Persistence**: stored in `chat_sessions.system_prompt`; same `WHERE id = ? AND user_id = ?` ownership rule as every other session operation.
## Observability
Two endpoints surface API metrics:
- `GET /metrics` — Prometheus text exposition (counter / summary), scraper-friendly. Default Prometheus port / scrape target.
- `GET /api/metrics` — same data as JSON for humans and the smoke test. Shape:
```json
{
"started_at": "2026-06-29T12:00:00.000Z",
"uptime_seconds": 1234,
"totals": { "requests": 5678, "errors_5xx": 0 },
"routes": [
{
"route": "/api/chat/stream",
"method": "POST",
"count": 42,
"avg_ms": 1230,
"p95_ms": 4500,
"max_ms": 8000,
"status_buckets": { "200_299": 42 }
}
],
"recent": [
{
"route": "/api/sessions/:id",
"method": "DELETE",
"status": 204,
"durationMs": 4,
"timestamp": 1782727300000
}
]
}
```
Routes are aggregated by route **template** (e.g. `/api/sessions/:id`), not by raw URL, so `/api/sessions/abc` and `/api/sessions/def` share a bucket. p95 uses a fixed-size streaming reservoir (200 samples) so memory stays bounded under traffic. In-memory only — counters reset on restart; that's the expected behavior for a 5-user MVP.
## Auth
The backend supports two modes:
- `AUTH_MODE=local`: dev mode, uses `local-user` with roles `admin` and `webhook-runner`.
- `AUTH_MODE=keycloak`: validates `Authorization: Bearer <token>` with remote JWKS from `OIDC_ISSUER` and `OIDC_AUDIENCE`.
For manual Keycloak testing, the UI lets you paste a JWT in the "Dev token" box. That token is stored in `localStorage` and sent as `Authorization: Bearer <token>` on API and stream calls. Alternatively, Vite can receive `VITE_AUTH_TOKEN` to preconfigure it for the local environment.
Claims used from Keycloak:
- `sub` as `user.id`.
- `preferred_username` and `email` for display.
- Roles from `realm_access.roles` and `resource_access[OIDC_AUDIENCE].roles`.
## Basic hardening
- `API_BODY_LIMIT_BYTES`: global Fastify body limit. Default: `1048576`.
- `CHAT_MESSAGE_MAX_CHARS`: chat message and `lastUserMessage` limit on webhooks. Default: `8000`.
- `CORS_ALLOWED_ORIGINS`: comma-separated list. If unset, open for dev.
- `LLM_TIMEOUT_MS`: OpenAI-compatible call timeout. Default: `30000`.
- `WEBHOOK_TIMEOUT_MS`: backend-only webhook execution timeout. Default: `15000`.
- `WEBHOOK_RETRY_MAX_ATTEMPTS`: retries per webhook on transient errors (5xx, 429, timeout, network). Default: `3`.
- `WEBHOOK_RETRY_INITIAL_BACKOFF_MS`: initial backoff with exponential growth. Default: `500`.
- `WEBHOOK_RETRY_MAX_BACKOFF_MS`: backoff cap. Default: `5000`.
- `WEBHOOK_RUNS_RETENTION_DAYS`: age cutoff for `webhook_runs` rows. Runs older than this are purged on boot and on a timer. Default: `30`. Set to `0` to disable the age pass.
- `WEBHOOK_RUNS_MAX_PER_USER`: keep at most this many most-recent runs per user. The oldest overflow is purged. Default: `1000`. Set to `0` to disable the cap pass.
- `WEBHOOK_AUDIT_PURGE_INTERVAL_MS`: how often the janitor runs while the API is up. Default: `3600000` (1 hour). Minimum: `60000` (1 minute).
- `CHAT_RATE_LIMIT_PER_MINUTE`: per-user rate limit on `POST /api/chat/stream` (token-bucket refill rate). Default: `20`.
- `CHAT_RATE_LIMIT_BURST`: per-user burst size. Default: `5`. Rejected calls return `429` with `retry-after` in seconds and `x-ratelimit-remaining: 0`.
- The API adds basic defensive headers: `x-content-type-options`, `referrer-policy`, `x-frame-options`.
## End-to-end smoke test
A smoke script exercises the full API (health, auth, models, docs, webhooks, sessions, SSE stream, message persistence and audit).
### With a real LLM (MiniMax)
```bash
# Terminal 1: start the API and the web
export LLM_BASE_URL=https://api.minimax.io/v1
export LLM_API_KEY="$MINIMAX_API_KEY"
export DEFAULT_MODEL=fast
pnpm dev
# Terminal 2: smoke test against http://localhost:3000
pnpm smoke
```
### With the mock LLM (no key)
```bash
# Terminal 1: start the API and the web pointing at the mock
pnpm mock:llm &
export LLM_BASE_URL=http://127.0.0.1:4010/v1
export LLM_API_KEY=dummy
export DEFAULT_MODEL=fast
pnpm dev
# Terminal 2
pnpm smoke
# or in a single step, the script starts the mock internally:
pnpm smoke:mock
```
Steps covered (in order):
1. `/healthz`, `/readyz`
2. `/api/me` (local auth)
3. `/api/models`
4. `/api/docs/search` + `/api/docs/:id`
5. `/api/webhooks`
6. `POST /api/sessions` + `GET /api/sessions`
7. `POST /api/chat/stream` and SSE event parsing (`docs`, `token`, `actions`, `done`)
8. `GET /api/sessions/:id` to confirm the assistant message was persisted
9. `GET /api/webhook-runs?sessionId=...` to confirm audit listing
10. `DELETE /api/sessions/:id` (cleanup)
Optional flags:
- `pnpm smoke --api-base http://localhost:4000` to point at a different API
- `pnpm smoke:mock` (alias of `pnpm smoke --mock-llm`) starts the mock inside the script

28
apps/api/package.json Normal file
View File

@@ -0,0 +1,28 @@
{
"name": "@pi-chat/api",
"private": true,
"version": "0.1.0",
"type": "module",
"scripts": {
"dev": "tsx watch src/server.ts",
"typecheck": "tsc --noEmit",
"lint": "tsc --noEmit"
},
"dependencies": {
"@fastify/cors": "^11.0.1",
"@pi-chat/pi-adapter": "workspace:*",
"@pi-chat/shared": "workspace:*",
"better-sqlite3": "^11.10.0",
"dotenv": "^17.4.2",
"fastify": "^5.3.3",
"jose": "^5.10.0",
"yaml": "^2.7.1",
"zod": "^3.25.67"
},
"devDependencies": {
"@types/better-sqlite3": "^7.6.13",
"@types/node": "^22.15.32",
"tsx": "^4.20.3",
"typescript": "^5.8.3"
}
}

View File

@@ -0,0 +1,76 @@
import type { AuthUser } from "@pi-chat/shared";
import type { FastifyRequest } from "fastify";
import { createRemoteJWKSet, jwtVerify } from "jose";
import { envString } from "../env.js";
type KeycloakClaims = {
sub?: string;
preferred_username?: string;
email?: string;
realm_access?: { roles?: string[] };
resource_access?: Record<string, { roles?: string[] }>;
};
const authMode = () => envString(process.env.AUTH_MODE, "local");
const oidcIssuer = () => envString(process.env.OIDC_ISSUER, "https://auth.rikrdo.com/realms/homelab");
const oidcAudience = () => envString(process.env.OIDC_AUDIENCE, "pi-chat");
let jwks: ReturnType<typeof createRemoteJWKSet> | undefined;
const getLocalUser = (): AuthUser => ({
id: "local-user",
username: "local-user",
roles: ["admin", "webhook-runner"],
});
const bearerTokenFrom = (request: FastifyRequest) => {
const header = request.headers.authorization;
if (!header?.startsWith("Bearer ")) {
throw new Error("auth_missing_bearer_token");
}
return header.slice("Bearer ".length).trim();
};
const rolesFromClaims = (claims: KeycloakClaims) => {
const audience = oidcAudience();
const realmRoles = claims.realm_access?.roles ?? [];
const clientRoles = claims.resource_access?.[audience]?.roles ?? [];
return [...new Set([...realmRoles, ...clientRoles])];
};
const getKeycloakUser = async (request: FastifyRequest): Promise<AuthUser> => {
const issuer = oidcIssuer();
const audience = oidcAudience();
jwks ??= createRemoteJWKSet(new URL(`${issuer}/protocol/openid-connect/certs`));
const { payload } = await jwtVerify(bearerTokenFrom(request), jwks, {
issuer,
audience,
});
const claims = payload as KeycloakClaims;
if (!claims.sub) {
throw new Error("auth_missing_subject");
}
return {
id: claims.sub,
username: claims.preferred_username,
email: claims.email,
roles: rolesFromClaims(claims),
};
};
export const getAuthUser = async (request: FastifyRequest): Promise<AuthUser> => {
if (authMode() === "local") {
return getLocalUser();
}
if (authMode() === "keycloak") {
return getKeycloakUser(request);
}
throw new Error(`auth_mode_not_supported:${authMode()}`);
};

View File

@@ -0,0 +1,6 @@
import type { FastifyInstance } from "fastify";
import { getAuthUser } from "./index.js";
export const registerAuthRoutes = async (app: FastifyInstance) => {
app.get("/api/me", async (request) => ({ user: await getAuthUser(request) }));
};

355
apps/api/src/chat/routes.ts Normal file
View File

@@ -0,0 +1,355 @@
import { createOpenAICompatiblePiAdapter } from "@pi-chat/pi-adapter";
import type { InternalDocReference, RecommendedAction } from "@pi-chat/shared";
import type { FastifyInstance, FastifyReply } from "fastify";
import { z } from "zod";
import { getAuthUser } from "../auth/index.js";
import type { AppDatabase } from "../db/database.js";
import { createDocsRepository, type KnowledgeSearchResult } from "../docs/repository.js";
import { findModelDefinition, getDefaultModelId, resolveFallbackChain, resolveModelApiKey } from "../models/config.js";
import { envNumber } from "../env.js";
import { chatRateLimiterFromEnv } from "../rate-limit.js";
import { getEnabledSkillPrompts } from "../skills/config.js";
import { createMessageRepository, createSessionRepository } from "../sessions/repository.js";
import { createWebhookAuditRepository } from "../webhooks/audit.js";
import { canUseWebhook, loadWebhookDefinitions } from "../webhooks/config.js";
const chatStreamBody = z.object({
sessionId: z.string().min(1),
message: z.string().trim().min(1).max(envNumber(process.env.CHAT_MESSAGE_MAX_CHARS, 8_000)),
model: z.string().trim().default(getDefaultModelId()),
});
const sendEvent = (reply: FastifyReply, event: unknown) => {
reply.raw.write(`data: ${JSON.stringify(event)}\n\n`);
};
const sendAnswerTokens = (reply: FastifyReply, answer: string) => {
for (const token of answer.match(/\S+\s*/g) ?? []) {
sendEvent(reply, { type: "token", token });
}
};
const toDocReference = (doc: KnowledgeSearchResult): InternalDocReference => ({
id: doc.id,
title: doc.title,
source: doc.source,
relevance: doc.relevance,
});
const shouldAutoTitle = (title: string | null) => !title || title === "New session";
const titleFromMessage = (message: string) => {
const normalized = message.replace(/\s+/g, " ").trim();
return normalized.length > 48 ? `${normalized.slice(0, 45)}...` : normalized;
};
// Match a webhook id against a chunk of text. Accepts the full id (with - or _)
// OR its first word (e.g. id "dns-flush" matches "dns" in "flush the dns cache").
// Both sides are normalized to lowercase and dashes/underscores → spaces.
const matchesId = (text: string, id: string): boolean => {
const normalized = text.toLowerCase().replace(/[-_]+/g, " ");
const normalizedId = id.toLowerCase().replace(/[-_]+/g, " ");
if (normalized.includes(normalizedId)) return true;
const firstWord = normalizedId.split(" ")[0] ?? "";
if (firstWord.length >= 3 && new RegExp(`\\b${firstWord}\\b`, "i").test(normalized)) {
return true;
}
return false;
};
const enrichActionsWithMentions = (
userMessage: string,
answer: string,
recommended: RecommendedAction[],
available: RecommendedAction[],
): RecommendedAction[] => {
const seen = new Set(recommended.map((action) => action.id));
const enriched = [...recommended];
for (const candidate of available) {
if (seen.has(candidate.id)) continue;
if (matchesId(userMessage, candidate.id) || matchesId(answer, candidate.id)) {
enriched.push({
...candidate,
// Lower confidence than LLM-recommended ones so the UI can show the
// difference if needed; still actionable.
confidence: 0.4,
reason: "Mentioned in the conversation",
});
seen.add(candidate.id);
}
}
return enriched;
};
export const registerChatRoutes = async (app: FastifyInstance, db: AppDatabase) => {
const sessions = createSessionRepository(db);
const messages = createMessageRepository(db);
const docs = createDocsRepository();
const audit = createWebhookAuditRepository(db);
const rateLimiter = chatRateLimiterFromEnv();
app.post("/api/chat/stream", async (request, reply) => {
const user = await getAuthUser(request);
const decision = rateLimiter.consume(user.id);
if (!decision.ok) {
const retryAfterSec = Math.max(1, Math.ceil(decision.retryAfterMs / 1000));
app.log.warn({ user: user.id, retryAfterSec }, "chat rate limit exceeded");
return reply
.code(429)
.header("retry-after", String(retryAfterSec))
.header("x-ratelimit-remaining", "0")
.send({
error: "rate_limited",
retry_after_ms: decision.retryAfterMs,
});
}
reply.header("x-ratelimit-remaining", String(decision.remaining));
const body = chatStreamBody.parse(request.body);
const session = sessions.get(user.id, body.sessionId);
const selectedModel = findModelDefinition(body.model);
if (!session) {
return reply.code(404).send({ error: "session_not_found" });
}
if (!selectedModel) {
return reply.code(400).send({ error: "model_not_found" });
}
const resolvedKey = resolveModelApiKey(selectedModel);
app.log.debug(
{
model: selectedModel.id,
keyLen: resolvedKey.length,
llmKeyLen: (process.env.LLM_API_KEY ?? "").length,
},
"llm api key resolved",
);
// Build the ordered fallback chain starting at the selected model. The
// adapter is built fresh per model because base URL, key and provider
// model name can differ across chain entries.
const chain = resolveFallbackChain(selectedModel.id);
const chainModels = chain
.map((id) => findModelDefinition(id))
.filter((m): m is NonNullable<ReturnType<typeof findModelDefinition>> => Boolean(m));
const adapters = chainModels.map((model) => ({
model,
pi: createOpenAICompatiblePiAdapter({
baseUrl: model.base_url || process.env.LLM_BASE_URL || "https://api.minimax.io/v1",
apiKey: resolveModelApiKey(model),
defaultModel: model.model,
maxTokens: model.max_tokens,
}),
}));
reply.raw.writeHead(200, {
"content-type": "text/event-stream; charset=utf-8",
"cache-control": "no-cache, no-transform",
connection: "keep-alive",
});
const userMessage = messages.create({
sessionId: body.sessionId,
userId: user.id,
role: "user",
content: body.message,
metadata: { model: selectedModel.id },
});
if (shouldAutoTitle(session.title)) {
sessions.updateTitle(user.id, body.sessionId, titleFromMessage(body.message));
}
sessions.touch(user.id, body.sessionId);
const docResults = await docs.search(body.message, 5);
const internalDocs = docResults.map(toDocReference);
// Soft usage signal: how often has this user run each webhook in the
// recent past. Surfaced as `usageHint` so the LLM can prefer frequently
// used webhooks when ambiguous, and so the UI can show a "Most used" tag.
const usageSinceDays = Math.max(0, envNumber(process.env.WEBHOOK_USAGE_WINDOW_DAYS, 7));
const usageSince = usageSinceDays > 0
? new Date(Date.now() - usageSinceDays * 86_400_000).toISOString()
: new Date(0).toISOString();
const usageMap = usageSinceDays > 0
? audit.usageForUserSince(usageSince, user.id)
: {};
const formatUsageHint = (webhookId: string): string | null => {
const stats = usageMap[webhookId];
if (!stats || stats.runs === 0) return null;
const successPct = Math.round(stats.successRate * 100);
return `${stats.runs} run${stats.runs === 1 ? "" : "s"} in last ${usageSinceDays}d, ${successPct}% success`;
};
const availableActions: RecommendedAction[] = loadWebhookDefinitions()
.filter((webhook) => canUseWebhook(user.roles, webhook))
.map((webhook) => {
const usageHint = formatUsageHint(webhook.id);
return {
type: "webhook" as const,
id: webhook.id,
confidence: 0,
reason: webhook.description ?? webhook.label,
requires_confirmation: webhook.confirmation_required,
...(usageHint ? { usageHint } : {}),
};
});
sendEvent(reply, { type: "docs", docs: docResults });
try {
const history = messages
.listForSession(user.id, body.sessionId)
.filter((message) => message.id !== userMessage.id)
.slice(-12)
.map((message) => ({ role: message.role, content: message.content }));
const t0 = Date.now();
// Walk the fallback chain. The first adapter that returns ok=true
// wins. If a structured error comes back from any one model we move
// to the next; an exception (network/5xx/timeout) also jumps chain.
let chat: Awaited<ReturnType<typeof adapters[number]["pi"]["chat"]>> | null = null;
let usedModelId = selectedModel.id;
let fallbackAttempts = 0;
const failures: Array<{ model: string; reason: string; kind?: string }> = [];
for (const entry of adapters) {
try {
const result = await entry.pi.chat({
message: body.message,
model: entry.model.model,
docs: internalDocs,
availableActions,
history,
skillPrompts: getEnabledSkillPrompts(),
systemPrompt: session.system_prompt,
});
if (result.ok) {
chat = result;
usedModelId = entry.model.id;
break;
}
// Structured error (no_content / json_parse / schema). Try next.
failures.push({
model: entry.model.id,
kind: result.error.kind,
reason: result.error.kind === "no_content"
? result.error.message
: result.error.reason,
});
fallbackAttempts += 1;
chat = result; // keep last error for the controlled fallback path
usedModelId = entry.model.id;
} catch (error) {
// Transport / timeout / 5xx — also fall through.
failures.push({
model: entry.model.id,
reason: error instanceof Error ? error.message : String(error),
});
fallbackAttempts += 1;
app.log.warn(
{ model: entry.model.id, err: error },
"llm call failed, trying next model in fallback chain",
);
}
}
const durationMs = Date.now() - t0;
if (!chat) {
throw new Error("all fallback models failed");
}
// The adapter may return ok=true (well-formed JSON) or ok=false with
// a structured error + safe fallback. In both cases the fallback
// contains a usable `answer` and (possibly empty) actions; we never
// throw on a parse/schema problem — those are operational signal, not
// request failures.
const result = chat.ok ? chat.result : chat.fallback;
if (!chat.ok) {
app.log.warn(
{
kind: chat.error.kind,
reason: chat.error.kind === "no_content" ? chat.error.message : chat.error.reason,
model: usedModelId,
},
"pi-adapter returned a parse/structured error; using safe fallback",
);
}
// Deterministic safety net: if the LLM forgot to put a relevant webhook
// in `recommended_actions` (common with short user prompts), scan both
// the user's input and the model's answer for any role-allowed webhook
// id (or its first word) and synthesize an action so the user can still
// execute it from the right panel.
const recommendedActions = enrichActionsWithMentions(
body.message,
result.answer,
result.recommended_actions,
availableActions,
);
sendAnswerTokens(reply, result.answer);
sendEvent(reply, { type: "actions", actions: recommendedActions });
const assistantMetadata: Record<string, unknown> = {
model: usedModelId,
docs: result.internal_docs,
actions: result.recommended_actions,
usage: { ...(chat.usage ?? {}), durationMs },
};
if (usedModelId !== selectedModel.id || fallbackAttempts > 0) {
assistantMetadata.requested_model = selectedModel.id;
assistantMetadata.fallback_attempts = fallbackAttempts;
assistantMetadata.fallback_chain = chain;
assistantMetadata.fallback_failures = failures;
}
if (!chat.ok) {
assistantMetadata.error_kind = chat.error.kind;
assistantMetadata.error_reason = chat.error.kind === "no_content"
? chat.error.message
: chat.error.reason;
assistantMetadata.fallback = true;
}
messages.create({
sessionId: body.sessionId,
userId: user.id,
role: "assistant",
content: result.answer,
metadata: assistantMetadata,
});
sessions.touch(user.id, body.sessionId);
sendEvent(reply, { type: "done" });
reply.raw.end();
} catch (error) {
const message = "I could not complete the model response. The user message has been saved.";
app.log.error(error);
messages.create({
sessionId: body.sessionId,
userId: user.id,
role: "assistant",
content: message,
metadata: {
model: selectedModel.id,
error: error instanceof Error ? error.message : "unknown_error",
},
});
sessions.touch(user.id, body.sessionId);
sendEvent(reply, { type: "token", token: message });
sendEvent(reply, { type: "error", error: "llm_error" });
sendEvent(reply, { type: "done" });
reply.raw.end();
}
return reply;
});
};

View File

@@ -0,0 +1,27 @@
import Database from "better-sqlite3";
import { mkdirSync } from "node:fs";
import { dirname, resolve } from "node:path";
export type AppDatabase = Database.Database;
function sqlitePathFromUrl(databaseUrl: string): string {
if (databaseUrl.startsWith("sqlite:///")) {
return databaseUrl.replace("sqlite://", "");
}
if (databaseUrl.startsWith("sqlite://")) {
return databaseUrl.replace("sqlite://", "");
}
return databaseUrl;
}
export function openDatabase(databaseUrl = process.env.DATABASE_URL ?? "sqlite://./data/pi-chat.db"): AppDatabase {
const filename = resolve(sqlitePathFromUrl(databaseUrl));
mkdirSync(dirname(filename), { recursive: true });
const db = new Database(filename);
db.pragma("journal_mode = WAL");
db.pragma("foreign_keys = ON");
return db;
}

View File

@@ -0,0 +1,62 @@
import type { AppDatabase } from "./database.js";
export function migrate(db: AppDatabase): void {
db.exec(`
CREATE TABLE IF NOT EXISTS chat_sessions (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
title TEXT,
system_prompt TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_chat_sessions_user_updated
ON chat_sessions(user_id, updated_at DESC);
CREATE TABLE IF NOT EXISTS chat_messages (
id TEXT PRIMARY KEY,
session_id TEXT NOT NULL,
user_id TEXT NOT NULL,
role TEXT NOT NULL,
content TEXT NOT NULL,
metadata TEXT,
created_at TEXT NOT NULL,
FOREIGN KEY (session_id) REFERENCES chat_sessions(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_chat_messages_session_user_created
ON chat_messages(session_id, user_id, created_at ASC);
CREATE TABLE IF NOT EXISTS webhook_runs (
id TEXT PRIMARY KEY,
webhook_id TEXT NOT NULL,
user_id TEXT NOT NULL,
session_id TEXT NOT NULL,
status TEXT NOT NULL,
request_payload TEXT,
response_status INTEGER,
attempts INTEGER NOT NULL DEFAULT 1,
created_at TEXT NOT NULL,
FOREIGN KEY (session_id) REFERENCES chat_sessions(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_webhook_runs_session_user_created
ON webhook_runs(session_id, user_id, created_at DESC);
`);
// Idempotent additive migrations for existing DBs.
const webhookRunColumns = db
.prepare("PRAGMA table_info(webhook_runs)")
.all() as Array<{ name: string }>;
if (!webhookRunColumns.some((column) => column.name === "attempts")) {
db.exec("ALTER TABLE webhook_runs ADD COLUMN attempts INTEGER NOT NULL DEFAULT 1");
}
const sessionColumns = db
.prepare("PRAGMA table_info(chat_sessions)")
.all() as Array<{ name: string }>;
if (!sessionColumns.some((column) => column.name === "system_prompt")) {
db.exec("ALTER TABLE chat_sessions ADD COLUMN system_prompt TEXT");
}
}

View File

@@ -0,0 +1,255 @@
import { readdirSync, readFileSync, statSync } from "node:fs";
import { relative, resolve } from "node:path";
import YAML from "yaml";
import { envString } from "../env.js";
import { loadRagConfig } from "../rag/config.js";
import { getViaRag, isRagRemote, searchViaRag } from "../rag/client.js";
export type KnowledgeDoc = {
id: string;
title: string;
source: string;
tags: string[];
owner?: string;
updated?: string;
headings: string[];
content: string;
};
export type KnowledgeSearchResult = Omit<KnowledgeDoc, "content"> & {
relevance: number;
excerpt: string;
};
const defaultKnowledgeDir = () => resolve(process.cwd(), "../../knowledge");
const normalizePathId = (source: string) =>
source.replace(/\.md$/i, "").split(/[\\/]/g).join(":");
const walkMarkdownFiles = (dir: string): string[] => {
const entries = readdirSync(dir, { withFileTypes: true });
return entries.flatMap((entry) => {
const fullPath = resolve(dir, entry.name);
if (entry.isDirectory()) {
return walkMarkdownFiles(fullPath);
}
if (entry.isFile() && entry.name.endsWith(".md")) {
return [fullPath];
}
return [];
});
};
const parseFrontmatter = (raw: string) => {
if (!raw.startsWith("---")) {
return { metadata: {}, body: raw };
}
const end = raw.indexOf("\n---", 3);
if (end === -1) {
return { metadata: {}, body: raw };
}
const frontmatter = raw.slice(3, end).trim();
const body = raw.slice(end + 4).trim();
const metadata = YAML.parse(frontmatter) ?? {};
return { metadata, body };
};
const extractHeadings = (body: string) =>
body
.split("\n")
.filter((line) => line.startsWith("#"))
.map((line) => line.replace(/^#+\s*/, "").trim())
.filter(Boolean);
const tokenize = (value: string) =>
value
.toLowerCase()
.split(/[^a-z0-9]+/i)
.map((token) => token.trim())
.filter((token) => token.length >= 2);
const scoreDoc = (doc: KnowledgeDoc, query: string) => {
const tokens = tokenize(query);
if (tokens.length === 0) {
return 0.1;
}
const title = doc.title.toLowerCase();
const tags = doc.tags.join(" ").toLowerCase();
const headings = doc.headings.join(" ").toLowerCase();
const source = doc.source.toLowerCase();
const content = doc.content.toLowerCase();
return tokens.reduce((score, token) => {
if (title.includes(token)) score += 5;
if (tags.includes(token)) score += 4;
if (headings.includes(token)) score += 3;
if (source.includes(token)) score += 2;
if (content.includes(token)) score += 1;
return score;
}, 0);
};
const excerptFor = (content: string, query: string) => {
const token = tokenize(query)[0];
const compact = content.replace(/\s+/g, " ").trim();
if (!token) return compact.slice(0, 220);
const index = compact.toLowerCase().indexOf(token);
if (index === -1) return compact.slice(0, 220);
return compact.slice(Math.max(0, index - 80), index + 140);
};
// RAG-driven tag filter: a doc matches if it has at least one of the
// includeTags (if any) and none of the excludeTags.
const passesTagFilter = (
doc: KnowledgeSearchResult,
includeTags: string[],
excludeTags: string[],
): boolean => {
if (includeTags.length > 0) {
const hasIncluded = doc.tags.some((tag) => includeTags.includes(tag));
if (!hasIncluded) return false;
}
if (excludeTags.length > 0) {
const hasExcluded = doc.tags.some((tag) => excludeTags.includes(tag));
if (hasExcluded) return false;
}
return true;
};
export const loadKnowledgeDocs = (knowledgeDir = envString(process.env.KNOWLEDGE_DIR, defaultKnowledgeDir())): KnowledgeDoc[] => {
const root = resolve(knowledgeDir);
try {
statSync(root);
} catch {
return [];
}
return walkMarkdownFiles(root).map((filePath) => {
const source = relative(root, filePath);
const raw = readFileSync(filePath, "utf8");
const { metadata, body } = parseFrontmatter(raw);
const data = metadata as Record<string, unknown>;
const headings = extractHeadings(body);
return {
id: normalizePathId(source),
title: typeof data.title === "string" ? data.title : headings[0] ?? source,
source,
tags: Array.isArray(data.tags) ? data.tags.map(String) : [],
owner: typeof data.owner === "string" ? data.owner : undefined,
updated: typeof data.updated === "string" ? data.updated : undefined,
headings,
content: body,
};
});
};
export const createDocsRepository = () => {
const rag = loadRagConfig();
const useRemote = isRagRemote(rag);
return {
async search(query: string, limit?: number): Promise<KnowledgeSearchResult[]> {
const effectiveLimit = Math.max(1, limit ?? rag.topK);
if (useRemote) {
try {
return await searchViaRag(rag, query, effectiveLimit);
} catch (error) {
if (!rag.fallbackToLocal) throw error;
}
}
// Local fallback: read from knowledge/ and apply token-overlap scoring.
const fullDocs = loadKnowledgeDocs().filter((doc) =>
passesTagFilter(
{
id: doc.id,
title: doc.title,
source: doc.source,
tags: doc.tags,
owner: doc.owner,
updated: doc.updated,
headings: doc.headings,
relevance: 0,
excerpt: "",
},
rag.includeTags,
rag.excludeTags,
),
);
const scored = fullDocs
.map((doc) => ({
...doc,
relevance: scoreDoc(doc, query),
excerpt: excerptFor(doc.content, query),
}))
.filter((doc) => doc.relevance >= rag.minRelevance && doc.relevance > 0);
return scored
.sort((a, b) => b.relevance - a.relevance || a.title.localeCompare(b.title))
.slice(0, effectiveLimit)
.map(({ content: _content, ...doc }) => doc);
},
async get(id: string): Promise<KnowledgeDoc | undefined> {
if (useRemote) {
try {
return await getViaRag(rag, id);
} catch (error) {
if (!rag.fallbackToLocal) throw error;
}
}
return loadKnowledgeDocs().find((doc) => doc.id === id);
},
async list(limit = 500): Promise<KnowledgeSearchResult[]> {
const candidates = (await getAllMetadataLocal())
.filter((doc) => passesTagFilter(doc, rag.includeTags, rag.excludeTags))
.slice(0, limit);
return candidates;
},
async count(): Promise<number> {
const candidates = await getAllMetadataLocal();
return candidates.filter((doc) =>
passesTagFilter(doc, rag.includeTags, rag.excludeTags),
).length;
},
};
};
const getAllMetadataLocal = async (): Promise<KnowledgeSearchResult[]> => {
const rag = loadRagConfig();
const useRemote = isRagRemote(rag);
if (useRemote) {
try {
return await searchViaRag(rag, "", 1000);
} catch (error) {
if (!rag.fallbackToLocal) throw error;
}
}
const docs = loadKnowledgeDocs();
return docs.map((doc) => ({
id: doc.id,
title: doc.title,
source: doc.source,
tags: doc.tags,
owner: doc.owner,
updated: doc.updated,
headings: doc.headings,
relevance: 0,
excerpt: "",
}));
};

View File

@@ -0,0 +1,41 @@
import type { FastifyInstance } from "fastify";
import { z } from "zod";
import { createDocsRepository } from "./repository.js";
const searchQuery = z.object({
q: z.string().trim().default(""),
limit: z.coerce.number().int().min(1).max(20).default(5),
});
const listQuery = z.object({
limit: z.coerce.number().int().min(1).max(1000).default(500),
});
export const registerDocsRoutes = async (app: FastifyInstance) => {
const docs = createDocsRepository();
app.get("/api/docs", async (request) => {
const query = listQuery.parse(request.query);
// Await explicitly so Fastify serializes a real array, not an
// unresolved Promise which would be `{}` in the response body.
const [items, total] = await Promise.all([docs.list(query.limit), docs.count()]);
return { items, total };
});
app.get("/api/docs/search", async (request) => {
const query = searchQuery.parse(request.query);
const items = await docs.search(query.q, query.limit);
return { items };
});
app.get("/api/docs/:id", async (request, reply) => {
const params = z.object({ id: z.string().min(1) }).parse(request.params);
const doc = docs.get(params.id);
if (!doc) {
return reply.code(404).send({ error: "doc_not_found" });
}
return doc;
});
};

20
apps/api/src/env.ts Normal file
View File

@@ -0,0 +1,20 @@
/**
* Small env helpers.
*
* `process.env.X ?? fallback` does NOT fall back on empty strings — only on
* undefined/null. That bites us when dotenv loads `KEY=` (blank value) from
* `.env`. Use `envString` / `envNumber` to get safe fallbacks.
*/
export const envString = (value: string | undefined | null, fallback: string): string => {
if (value === undefined || value === null) return fallback;
return value.length > 0 ? value : fallback;
};
export const envNumber = (value: string | undefined | null, fallback: number): number => {
if (value === undefined || value === null) return fallback;
const trimmed = value.trim();
if (trimmed.length === 0) return fallback;
const parsed = Number(trimmed);
return Number.isFinite(parsed) ? parsed : fallback;
};

106
apps/api/src/mcp/config.ts Normal file
View File

@@ -0,0 +1,106 @@
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { parse } from "yaml";
import { envString } from "../env.js";
export type McpToolParameterSchema = {
type: "object";
required?: string[];
properties?: Record<string, unknown>;
};
export type McpToolDefinition = {
id: string;
name: string;
description: string;
server: string | null;
parameters: McpToolParameterSchema;
tags: string[];
enabled: boolean;
};
export type McpServerDefinition = {
id: string;
name: string;
description: string;
endpoint: string;
};
export type PublicMcpToolDefinition = Omit<McpToolDefinition, "server"> & {
server: string | null;
};
export type PublicMcpServerDefinition = McpServerDefinition;
type McpFile = {
mcp_servers?: McpServerDefinition[];
mcp_tools?: McpToolDefinition[];
};
const defaultPath = (): string =>
envString(process.env.MCP_CONFIG_PATH, resolve(process.cwd(), "../../config/mcp.yml"));
const isToolParameterSchema = (value: unknown): value is McpToolParameterSchema => {
if (!value || typeof value !== "object") return false;
const v = value as McpToolParameterSchema;
return v.type === "object";
};
export const loadMcpTools = (
configPath: string = defaultPath(),
): McpToolDefinition[] => {
let raw: string;
try {
raw = readFileSync(configPath, "utf8");
} catch (error) {
const code = (error as NodeJS.ErrnoException).code;
if (code === "ENOENT") return [];
throw error;
}
const parsed = parse(raw) as McpFile | null;
if (!parsed || !Array.isArray(parsed.mcp_tools)) return [];
return parsed.mcp_tools
.filter((tool) => tool && typeof tool === "object" && typeof tool.id === "string")
.map((tool) => ({
id: String(tool.id).trim(),
name: String(tool.name ?? tool.id).trim(),
description: String(tool.description ?? "").trim(),
server: typeof tool.server === "string" ? tool.server : null,
parameters: isToolParameterSchema(tool.parameters)
? tool.parameters
: ({ type: "object", properties: {}, required: [] } satisfies McpToolParameterSchema),
tags: Array.isArray(tool.tags) ? tool.tags.map(String) : [],
enabled: tool.enabled !== false,
}))
.filter((tool) => tool.id.length > 0);
};
export const loadMcpServers = (
configPath: string = defaultPath(),
): McpServerDefinition[] => {
let raw: string;
try {
raw = readFileSync(configPath, "utf8");
} catch (error) {
const code = (error as NodeJS.ErrnoException).code;
if (code === "ENOENT") return [];
throw error;
}
const parsed = parse(raw) as McpFile | null;
if (!parsed || !Array.isArray(parsed.mcp_servers)) return [];
return parsed.mcp_servers
.filter((s) => s && typeof s === "object" && typeof s.id === "string")
.map((s) => ({
id: String(s.id).trim(),
name: String(s.name ?? s.id).trim(),
description: String(s.description ?? "").trim(),
endpoint: String(s.endpoint ?? "").trim(),
}))
.filter((s) => s.id.length > 0);
};
export const enabledMcpTools = (tools: McpToolDefinition[] = loadMcpTools()): McpToolDefinition[] =>
tools.filter((tool) => tool.enabled);
export const toPublicMcpTool = (tool: McpToolDefinition): PublicMcpToolDefinition => ({ ...tool });
export const toPublicMcpServer = (server: McpServerDefinition): PublicMcpServerDefinition => ({ ...server });

View File

@@ -0,0 +1,19 @@
import type { FastifyInstance } from "fastify";
import {
enabledMcpTools,
loadMcpServers,
toPublicMcpServer,
toPublicMcpTool,
} from "./config.js";
export const registerMcpRoutes = async (app: FastifyInstance) => {
app.get("/api/mcp/tools", async () => {
const items = enabledMcpTools().map(toPublicMcpTool);
return { items };
});
app.get("/api/mcp/servers", async () => {
const items = loadMcpServers().map(toPublicMcpServer);
return { items };
});
};

178
apps/api/src/metrics.ts Normal file
View File

@@ -0,0 +1,178 @@
// Tiny in-process metrics. Thread-safe enough for a 5-user MVP — counters
// and sums are only ever incremented under a single-threaded Node event
// loop, no atomic ops required.
export type RouteMetric = {
route: string;
method: string;
status: number;
durationMs: number;
timestamp: number;
};
type Aggregate = {
count: number;
statusBuckets: Map<number, number>;
sumMs: number;
maxMs: number;
p95Slots: number[]; // simple streaming reservoir for a coarse p95
};
const RESERVOIR_SIZE = 200;
const createAggregate = (): Aggregate => ({
count: 0,
statusBuckets: new Map(),
sumMs: 0,
maxMs: 0,
p95Slots: [],
});
const metricsState = {
startedAt: Date.now(),
aggregates: new Map<string, Aggregate>(),
// Last N events for the /metrics JSON inspector. Bounded to avoid leaks.
recent: [] as RouteMetric[],
recentLimit: 50,
errorCounts: new Map<string, number>(),
};
const keyFor = (route: string, method: string) => `${method.toUpperCase()} ${route}`;
export const observeHttp = (metric: RouteMetric) => {
const key = keyFor(metric.route, metric.method);
let agg = metricsState.aggregates.get(key);
if (!agg) {
agg = createAggregate();
metricsState.aggregates.set(key, agg);
}
agg.count += 1;
agg.sumMs += metric.durationMs;
if (metric.durationMs > agg.maxMs) agg.maxMs = metric.durationMs;
const statusBucket = Math.floor(metric.status / 100) * 100;
agg.statusBuckets.set(statusBucket, (agg.statusBuckets.get(statusBucket) ?? 0) + 1);
if (agg.p95Slots.length < RESERVOIR_SIZE) {
agg.p95Slots.push(metric.durationMs);
} else {
// Cheap replacement: evict the current max so the reservoir tracks the slowest N.
let maxIdx = 0;
for (let i = 1; i < agg.p95Slots.length; i++) {
if (agg.p95Slots[i] > agg.p95Slots[maxIdx]) maxIdx = i;
}
if (metric.durationMs < agg.p95Slots[maxIdx]) {
agg.p95Slots[maxIdx] = metric.durationMs;
}
}
if (metric.status >= 500) {
metricsState.errorCounts.set(key, (metricsState.errorCounts.get(key) ?? 0) + 1);
}
metricsState.recent.push(metric);
if (metricsState.recent.length > metricsState.recentLimit) {
metricsState.recent.splice(0, metricsState.recent.length - metricsState.recentLimit);
}
};
const percentile = (sorted: number[], p: number): number => {
if (sorted.length === 0) return 0;
const idx = Math.min(sorted.length - 1, Math.floor((p / 100) * sorted.length));
return sorted[idx];
};
export const snapshotMetrics = () => {
const routes: Array<{
route: string;
method: string;
count: number;
avg_ms: number;
p95_ms: number;
max_ms: number;
status_buckets: Record<string, number>;
}> = [];
for (const [key, agg] of metricsState.aggregates.entries()) {
const [method, ...rest] = key.split(" ");
const route = rest.join(" ");
const sorted = [...agg.p95Slots].sort((a, b) => a - b);
routes.push({
route,
method,
count: agg.count,
avg_ms: agg.count === 0 ? 0 : Math.round(agg.sumMs / agg.count),
p95_ms: Math.round(percentile(sorted, 95)),
max_ms: agg.maxMs,
status_buckets: Object.fromEntries(
[...agg.statusBuckets.entries()].map(([k, v]) => [`${k}_${k + 99}`, v]),
),
});
}
return {
started_at: new Date(metricsState.startedAt).toISOString(),
uptime_seconds: Math.round((Date.now() - metricsState.startedAt) / 1000),
totals: {
requests: [...metricsState.aggregates.values()].reduce((sum, a) => sum + a.count, 0),
errors_5xx: [...metricsState.errorCounts.values()].reduce((sum, n) => sum + n, 0),
},
routes: routes.sort((a, b) => b.count - a.count),
recent: [...metricsState.recent].reverse(),
};
};
// Tiny Prometheus-style exposition. Stable enough for a scraper.
export const renderPrometheusText = (): string => {
const lines: string[] = [];
lines.push("# HELP sic_uptime_seconds Seconds since the API process started");
lines.push("# TYPE sic_uptime_seconds gauge");
lines.push(`sic_uptime_seconds ${Math.round((Date.now() - metricsState.startedAt) / 1000)}`);
lines.push("");
lines.push("# HELP sic_http_requests_total Total HTTP requests, labelled by route, method, status");
lines.push("# TYPE sic_http_requests_total counter");
for (const [key, agg] of metricsState.aggregates.entries()) {
const [method, ...rest] = key.split(" ");
const route = rest.join(" ");
for (const [bucket, count] of agg.statusBuckets.entries()) {
const statusClass = `${bucket}_${bucket + 99}`;
lines.push(
`sic_http_requests_total{route="${route}",method="${method}",status_class="${statusClass}"} ${count}`,
);
}
}
lines.push("");
lines.push("# HELP sic_http_request_duration_ms Request duration in ms");
lines.push("# TYPE sic_http_request_duration_ms summary");
for (const [key, agg] of metricsState.aggregates.entries()) {
const [method, ...rest] = key.split(" ");
const route = rest.join(" ");
const sorted = [...agg.p95Slots].sort((a, b) => a - b);
const avg = agg.count === 0 ? 0 : Math.round(agg.sumMs / agg.count);
lines.push(
`sic_http_request_duration_ms{route="${route}",method="${method}",quantile="0.95"} ${percentile(sorted, 95)}`,
);
lines.push(
`sic_http_request_duration_ms_sum{route="${route}",method="${method}"} ${agg.sumMs}`,
);
lines.push(
`sic_http_request_duration_ms_count{route="${route}",method="${method}"} ${agg.count}`,
);
lines.push(
`sic_http_request_duration_ms_max{route="${route}",method="${method}"} ${agg.maxMs}`,
);
lines.push(
`sic_http_request_duration_ms_avg{route="${route}",method="${method}"} ${avg}`,
);
}
return `${lines.join("\n")}\n`;
};
export const __resetMetricsForTests = () => {
metricsState.startedAt = Date.now();
metricsState.aggregates.clear();
metricsState.recent.length = 0;
metricsState.errorCounts.clear();
};

View File

@@ -0,0 +1,105 @@
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { parse } from "yaml";
import { envString } from "../env.js";
export type ModelDefinition = {
id: string;
label: string;
provider: "openai-compatible";
base_url?: string;
api_key_env?: string;
model: string;
max_tokens?: number;
// Ordered list of model ids to try if this one fails (5xx, 429, network
// error, timeout). Each id must resolve to a known model; unknown ids are
// dropped at load time. Use ["mr-auto"] for a single fallback. The
// fallback chain for the chain itself is resolved at request time by
// `resolveFallbackChain`.
fallback?: string[];
};
export type PublicModelDefinition = Pick<ModelDefinition, "id" | "label" | "provider" | "max_tokens">;
// Resolve the API key for a model. Order of precedence:
// 1. Per-model env var (model.api_key_env) — useful when different providers
// use different keys (e.g. local proxy, dedicated self-hosted).
// 2. Global LLM_API_KEY / MINIMAX_API_KEY fallbacks shared by all models.
// 3. Literal "dummy" so OpenAI-compatible servers that don't require auth
// (e.g. local OLLAMA, self-hosted reverse proxy) still work out of the box.
export const resolveModelApiKey = (model: ModelDefinition): string => {
const fromModel = model.api_key_env ? process.env[model.api_key_env] : undefined;
if (fromModel && fromModel.trim().length > 0) return fromModel.trim();
return (
envString(process.env.LLM_API_KEY, envString(process.env.MINIMAX_API_KEY, "dummy"))
);
};
const expandEnv = (value: string | undefined) =>
value?.replace(/\$\{([A-Z0-9_]+)\}/g, (_match, key: string) => process.env[key] ?? "");
export const loadModelDefinitions = (): ModelDefinition[] => {
const configPath = envString(process.env.MODELS_CONFIG_PATH, resolve(process.cwd(), "../../config/models.yml"));
const parsed = parse(readFileSync(configPath, "utf8")) as { models?: ModelDefinition[] } | null;
const known = new Set((parsed?.models ?? []).map((model) => String(model.id ?? "").trim()));
return (parsed?.models ?? []).map((model) => {
const rawFallback = Array.isArray(model.fallback) ? model.fallback : [];
const fallback = rawFallback
.map((id) => String(id).trim())
.filter((id) => id.length > 0 && known.has(id) && id !== model.id);
return {
...model,
base_url: expandEnv(model.base_url),
fallback,
};
});
};
export const getDefaultModelId = () => envString(process.env.DEFAULT_MODEL, "fast");
export const findModelDefinition = (modelId: string) =>
loadModelDefinitions().find((model) => model.id === modelId);
/**
* Resolve the ordered fallback chain starting at `modelId`. Walks each model's
* `fallback` array until exhausted, dedupes by id, and stops if a cycle is
* detected. The starting model is always first. If the model is unknown the
* chain is just `[modelId]` (caller will surface model_not_found).
*/
export const resolveFallbackChain = (modelId: string): string[] => {
const all = loadModelDefinitions();
const byId = new Map(all.map((model) => [model.id, model]));
const chain: string[] = [];
const seen = new Set<string>();
// LLM_FALLBACK_CHAIN (comma-separated) overrides the YAML chain for the
// selected model. Empty / unset means "use the YAML chain".
const override = envString(process.env.LLM_FALLBACK_CHAIN, "")
.split(",")
.map((id) => id.trim())
.filter((id) => id.length > 0);
let cursor: string | undefined = modelId;
let nextCursor: string | undefined = override[0];
while (cursor && !seen.has(cursor)) {
seen.add(cursor);
chain.push(cursor);
const model = byId.get(cursor);
if (nextCursor !== undefined) {
cursor = nextCursor;
nextCursor = undefined;
continue;
}
if (!model || !model.fallback || model.fallback.length === 0) break;
cursor = model.fallback[0];
}
return chain;
};
export const toPublicModel = (model: ModelDefinition): PublicModelDefinition => ({
id: model.id,
label: model.label,
provider: model.provider,
max_tokens: model.max_tokens,
});

View File

@@ -0,0 +1,14 @@
import type { FastifyInstance } from "fastify";
import { getDefaultModelId, loadModelDefinitions, toPublicModel } from "./config.js";
export const registerModelRoutes = async (app: FastifyInstance) => {
app.get("/api/models", async () => {
const defaultModelId = getDefaultModelId();
const items = loadModelDefinitions().map(toPublicModel);
return {
default_model: items.some((model) => model.id === defaultModelId) ? defaultModelId : items[0]?.id,
items,
};
});
};

View File

@@ -0,0 +1,58 @@
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { parse } from "yaml";
import { envString } from "../env.js";
export type N8nWorkflowDefinition = {
id: string;
label: string;
description: string;
url: string;
required_roles: string[];
tags: string[];
};
export type PublicN8nWorkflowDefinition = N8nWorkflowDefinition;
type N8nFile = { n8n_workflows?: N8nWorkflowDefinition[] };
const expandEnv = (value: string): string =>
value.replace(/\$\{([A-Z0-9_]+)(?::\?[^}]+)?\}/g, (_match, name: string) => process.env[name] ?? "");
const defaultPath = (): string =>
envString(process.env.N8N_CONFIG_PATH, resolve(process.cwd(), "../../config/n8n-workflows.yml"));
export const loadN8nWorkflows = (
configPath: string = defaultPath(),
): N8nWorkflowDefinition[] => {
let raw: string;
try {
raw = readFileSync(configPath, "utf8");
} catch (error) {
const code = (error as NodeJS.ErrnoException).code;
if (code === "ENOENT") return [];
throw error;
}
const parsed = parse(raw) as N8nFile | null;
if (!parsed || !Array.isArray(parsed.n8n_workflows)) return [];
return parsed.n8n_workflows
.filter((wf) => wf && typeof wf === "object" && typeof wf.id === "string")
.map((wf) => ({
id: String(wf.id).trim(),
label: String(wf.label ?? wf.id).trim(),
description: String(wf.description ?? "").trim(),
url: expandEnv(String(wf.url ?? "").trim()),
required_roles: Array.isArray(wf.required_roles)
? wf.required_roles.map(String)
: [],
tags: Array.isArray(wf.tags) ? wf.tags.map(String) : [],
}))
.filter((wf) => wf.id.length > 0);
};
export const canUseN8nWorkflow = (userRoles: string[], wf: N8nWorkflowDefinition): boolean => {
if (!Array.isArray(wf.required_roles) || wf.required_roles.length === 0) return true;
return wf.required_roles.every((role) => userRoles.includes(role));
};
export const toPublicN8nWorkflow = (wf: N8nWorkflowDefinition): PublicN8nWorkflowDefinition => ({ ...wf });

View File

@@ -0,0 +1,13 @@
import type { FastifyInstance } from "fastify";
import { canUseN8nWorkflow, loadN8nWorkflows, toPublicN8nWorkflow } from "./config.js";
import { getAuthUser } from "../auth/index.js";
export const registerN8nRoutes = async (app: FastifyInstance) => {
app.get("/api/n8n-workflows", async (request) => {
const user = await getAuthUser(request);
const items = loadN8nWorkflows()
.filter((wf) => canUseN8nWorkflow(user.roles, wf))
.map(toPublicN8nWorkflow);
return { items };
});
};

105
apps/api/src/rag/client.ts Normal file
View File

@@ -0,0 +1,105 @@
import type { KnowledgeDoc, KnowledgeSearchResult } from "../docs/repository.js";
import type { RagConfig } from "./config.js";
export type RagSearchResponse = {
items: Array<{
id: string;
title?: string;
source?: string;
tags?: string[];
relevance?: number;
excerpt?: string;
content?: string;
}>;
};
export type RagGetResponse = Partial<KnowledgeDoc> & { id: string };
const ensureTrailing = (url: string) => url.replace(/\/$/, "");
export const isRagRemote = (config: RagConfig): boolean => config.endpoint.trim().length > 0;
const buildHeaders = (config: RagConfig): Record<string, string> => {
const headers: Record<string, string> = {
"content-type": "application/json",
accept: "application/json",
};
if (config.authToken) {
headers.authorization = `Bearer ${config.authToken}`;
}
return headers;
};
export const searchViaRag = async (
config: RagConfig,
query: string,
limit: number,
): Promise<KnowledgeSearchResult[]> => {
const url = `${ensureTrailing(config.endpoint)}/search`;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), config.timeoutMs);
try {
const response = await fetch(url, {
method: "POST",
headers: buildHeaders(config),
body: JSON.stringify({
query,
limit,
min_relevance: config.minRelevance,
include_tags: config.includeTags,
exclude_tags: config.excludeTags,
}),
signal: controller.signal,
});
if (!response.ok) {
throw new Error(`rag_search_failed:${response.status}`);
}
const data = (await response.json()) as RagSearchResponse;
if (!data || !Array.isArray(data.items)) return [];
return data.items.map((item) => ({
id: String(item.id),
title: String(item.title ?? item.id),
source: String(item.source ?? ""),
tags: Array.isArray(item.tags) ? item.tags.map(String) : [],
relevance: Number(item.relevance ?? 0),
excerpt: String(item.excerpt ?? ""),
headings: [],
}));
} finally {
clearTimeout(timeout);
}
};
export const getViaRag = async (
config: RagConfig,
id: string,
): Promise<KnowledgeDoc | undefined> => {
const url = `${ensureTrailing(config.endpoint)}/docs/${encodeURIComponent(id)}`;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), config.timeoutMs);
try {
const response = await fetch(url, {
method: "GET",
headers: buildHeaders(config),
signal: controller.signal,
});
if (response.status === 404) return undefined;
if (!response.ok) {
throw new Error(`rag_get_failed:${response.status}`);
}
const data = (await response.json()) as RagGetResponse;
if (!data || !data.id) return undefined;
return {
id: String(data.id),
title: String(data.title ?? data.id),
source: String(data.source ?? ""),
tags: Array.isArray(data.tags) ? data.tags.map(String) : [],
owner: typeof data.owner === "string" ? data.owner : undefined,
updated: typeof data.updated === "string" ? data.updated : undefined,
headings: Array.isArray(data.headings) ? data.headings.map(String) : [],
content: String(data.content ?? ""),
};
} finally {
clearTimeout(timeout);
}
};

View File

@@ -0,0 +1,80 @@
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { parse } from "yaml";
import { envString } from "../env.js";
export type RagChunkStrategy = "heading" | "paragraph" | "fixed";
export type RagConfig = {
endpoint: string;
authToken: string;
timeoutMs: number;
fallbackToLocal: boolean;
chunkStrategy: RagChunkStrategy;
chunkSizeChars: number;
topK: number;
minRelevance: number;
includeTags: string[];
excludeTags: string[];
};
export type PublicRagConfig = Omit<RagConfig, "authToken"> & {
// Never expose the auth token over the public API; show only whether
// one is configured.
hasAuthToken: boolean;
};
const defaultConfig = (): RagConfig => ({
endpoint: "",
authToken: "",
timeoutMs: 10_000,
fallbackToLocal: true,
chunkStrategy: "heading",
chunkSizeChars: 1500,
topK: 5,
minRelevance: 0,
includeTags: [],
excludeTags: [],
});
const expandEnv = (value: string): string =>
value.replace(/\$\{([A-Z0-9_]+):?\}/g, (_m, name: string) => process.env[name] ?? "");
const normalize = (raw: unknown): RagConfig => {
if (!raw || typeof raw !== "object") return defaultConfig();
const r = raw as Partial<RagConfig>;
const chunkStrategy: RagChunkStrategy =
r.chunkStrategy === "paragraph" || r.chunkStrategy === "fixed" ? r.chunkStrategy : "heading";
return {
endpoint: expandEnv(String(r.endpoint ?? "").trim()),
authToken: expandEnv(String(r.authToken ?? "").trim()),
timeoutMs: Math.max(100, Number(r.timeoutMs ?? 10_000)),
fallbackToLocal: r.fallbackToLocal !== false,
chunkStrategy,
chunkSizeChars: Math.max(200, Number(r.chunkSizeChars ?? 1500)),
topK: Math.max(1, Number(r.topK ?? 5)),
minRelevance: Math.max(0, Math.min(1, Number(r.minRelevance ?? 0))),
includeTags: Array.isArray(r.includeTags) ? r.includeTags.map(String) : [],
excludeTags: Array.isArray(r.excludeTags) ? r.excludeTags.map(String) : [],
};
};
const defaultPath = (): string =>
envString(process.env.RAG_CONFIG_PATH, resolve(process.cwd(), "../../config/rag.yml"));
export const loadRagConfig = (configPath: string = defaultPath()): RagConfig => {
try {
const raw = readFileSync(configPath, "utf8");
const parsed = parse(raw) as { rag?: unknown } | null;
return normalize(parsed?.rag);
} catch (error) {
const code = (error as NodeJS.ErrnoException).code;
if (code === "ENOENT") return defaultConfig();
throw error;
}
};
export const toPublicRagConfig = (config: RagConfig): PublicRagConfig => {
const { authToken: _auth, ...rest } = config;
return { ...rest, hasAuthToken: Boolean(config.authToken) };
};

View File

@@ -0,0 +1,6 @@
import type { FastifyInstance } from "fastify";
import { loadRagConfig, toPublicRagConfig } from "./config.js";
export const registerRagRoutes = async (app: FastifyInstance) => {
app.get("/api/rag/config", async () => toPublicRagConfig(loadRagConfig()));
};

View File

@@ -0,0 +1,88 @@
// In-memory token bucket rate limiter keyed by an arbitrary id (user id).
//
// Token bucket semantics:
// - Capacity = `burst` (max tokens the bucket can hold).
// - Refill = `perMinute / 60` tokens per second (lazy: tokens are added
// on `consume` based on elapsed time since the bucket was
// last touched).
// - Each accepted call consumes exactly one token. Calls that find the
// bucket empty are rejected and the caller gets back a Retry-After hint
// in milliseconds.
//
// Stale entries: the map only grows by the number of distinct ids seen.
// For a 5-user MVP this is bounded; for larger installs the caller can call
// `pruneStale(maxIdleMs)` periodically.
import { envNumber } from "./env.js";
export type RateLimiterOptions = {
perMinute: number;
burst: number;
};
export type ConsumeResult =
| { ok: true; remaining: number }
| { ok: false; retryAfterMs: number };
export type RateLimiter = {
consume(id: string, now?: number): ConsumeResult;
size: () => number;
reset: (id?: string) => void;
};
export const createRateLimiter = (
options: RateLimiterOptions,
): RateLimiter => {
const { perMinute, burst } = options;
const refillPerMs = perMinute / 60_000;
const buckets = new Map<string, { tokens: number; lastRefillMs: number }>();
const consume = (id: string, now: number = Date.now()): ConsumeResult => {
let bucket = buckets.get(id);
if (!bucket) {
bucket = { tokens: burst, lastRefillMs: now };
buckets.set(id, bucket);
} else {
const elapsed = Math.max(0, now - bucket.lastRefillMs);
const refilled = elapsed * refillPerMs;
if (refilled > 0) {
bucket.tokens = Math.min(burst, bucket.tokens + refilled);
bucket.lastRefillMs = now;
}
}
if (bucket.tokens >= 1) {
bucket.tokens -= 1;
return { ok: true, remaining: Math.floor(bucket.tokens) };
}
// Time until the bucket has at least 1 token.
const needed = 1 - bucket.tokens;
const retryAfterMs = refillPerMs > 0 ? Math.ceil(needed / refillPerMs) : 60_000;
return { ok: false, retryAfterMs };
};
return {
consume,
size: () => buckets.size,
reset: (id) => {
if (id === undefined) buckets.clear();
else buckets.delete(id);
},
};
};
export const chatRateLimiterFromEnv = (): RateLimiter => {
const perMinute = Math.max(1, envNumber(process.env.CHAT_RATE_LIMIT_PER_MINUTE, 20));
const burst = Math.max(1, envNumber(process.env.CHAT_RATE_LIMIT_BURST, 5));
return createRateLimiter({ perMinute, burst });
};
export const webhookRateLimiterFromEnv = (): RateLimiter => {
// Defaults: 60/min refill, burst 10. Generous on purpose — the goal is
// to stop a runaway loop, not throttle a real operator. Tighten per
// webhook_id in the future if a specific hook becomes a hotspot.
const perMinute = Math.max(1, envNumber(process.env.WEBHOOK_RATE_LIMIT_PER_MINUTE, 60));
const burst = Math.max(1, envNumber(process.env.WEBHOOK_RATE_LIMIT_BURST, 10));
return createRateLimiter({ perMinute, burst });
};

170
apps/api/src/server.ts Normal file
View File

@@ -0,0 +1,170 @@
import Fastify from "fastify";
import cors from "@fastify/cors";
import { ZodError } from "zod";
import { config as loadDotenv } from "dotenv";
import { dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url";
// Load `.env` from the repo root regardless of the cwd the process was
// started from. `import "dotenv/config"` would only look in `process.cwd()`,
// which silently breaks when the API is started from a sub-directory.
const __dirnameApi = dirname(fileURLToPath(import.meta.url));
loadDotenv({ path: resolve(__dirnameApi, "../../../.env") });
import { registerAuthRoutes } from "./auth/routes.js";
import { registerChatRoutes } from "./chat/routes.js";
import { openDatabase } from "./db/database.js";
import { migrate } from "./db/migrate.js";
import { registerDocsRoutes } from "./docs/routes.js";
import { envNumber } from "./env.js";
import { registerMcpRoutes } from "./mcp/routes.js";
import { registerModelRoutes } from "./models/routes.js";
import { registerN8nRoutes } from "./n8n/routes.js";
import { registerRagRoutes } from "./rag/routes.js";
import { registerSessionRoutes } from "./sessions/routes.js";
import { registerSkillRoutes } from "./skills/routes.js";
import { runWebhookAuditPurge, webhookAuditPurgeConfigFromEnv } from "./webhooks/audit.js";
import { registerWebhookRoutes } from "./webhooks/routes.js";
import { observeHttp, renderPrometheusText, snapshotMetrics } from "./metrics.js";
const port = envNumber(process.env.API_PORT, 8787);
const bodyLimit = envNumber(process.env.API_BODY_LIMIT_BYTES, 1_048_576);
const corsOrigin = () => {
const configured = process.env.CORS_ALLOWED_ORIGINS;
if (!configured) return true;
const origins = configured
.split(",")
.map((origin) => origin.trim())
.filter(Boolean);
if (origins.includes("*")) return true;
return origins;
};
const app = Fastify({
logger: true,
bodyLimit,
});
const db = openDatabase();
migrate(db);
await app.register(cors, {
origin: corsOrigin(),
});
// Observability: track every request by route + method + status with duration.
// The route template (e.g. "/api/sessions/:id") is preferred over the raw URL
// so `/api/sessions/abc` and `/api/sessions/def` aggregate into the same bucket.
app.addHook("onResponse", async (request, reply) => {
const route = request.routeOptions?.url ?? request.url ?? "unknown";
observeHttp({
route,
method: request.method,
status: reply.statusCode,
durationMs: Math.round(performance.now() - (request as { sic_startedAt?: number }).sic_startedAt!),
timestamp: Date.now(),
});
});
app.addHook("onRequest", async (request) => {
(request as { sic_startedAt?: number }).sic_startedAt = performance.now();
});
app.addHook("onSend", async (_request, reply, payload) => {
reply.header("x-content-type-options", "nosniff");
reply.header("referrer-policy", "no-referrer");
reply.header("x-frame-options", "DENY");
return payload;
});
app.setErrorHandler((error, _request, reply) => {
const message = error instanceof Error ? error.message : String(error);
if (error instanceof ZodError) {
return reply.code(400).send({
error: "validation_error",
issues: error.issues,
});
}
if (
message.startsWith("auth_") ||
message.startsWith("JWT") ||
message.startsWith("JWKS")
) {
return reply.code(401).send({ error: "unauthorized" });
}
app.log.error(error);
return reply.code(500).send({ error: "internal_error" });
});
app.get("/healthz", async () => ({ status: "ok" }));
app.get("/readyz", async () => {
db.prepare("SELECT 1").get();
return { status: "ready" };
});
app.get("/api/version", async () => ({
name: "pi-chat-api",
version: "0.1.0",
}));
// Observability surface. `/metrics` returns Prometheus text (scraper-friendly);
// `/api/metrics` returns the same data as JSON for humans and the smoke test.
app.get("/metrics", async (_request, reply) => {
reply.header("content-type", "text/plain; version=0.0.4; charset=utf-8");
return renderPrometheusText();
});
app.get("/api/metrics", async () => snapshotMetrics());
await registerSessionRoutes(app, db);
await registerAuthRoutes(app);
await registerDocsRoutes(app);
await registerModelRoutes(app);
await registerRagRoutes(app);
await registerSkillRoutes(app);
await registerN8nRoutes(app);
await registerMcpRoutes(app);
await registerWebhookRoutes(app, db);
await registerChatRoutes(app, db);
// Audit retention: run once on boot, then on a timer. Cheap, idempotent.
const auditPurgeConfig = webhookAuditPurgeConfigFromEnv();
const initialPurge = runWebhookAuditPurge(db, auditPurgeConfig);
if (initialPurge.deletedByAge > 0 || initialPurge.deletedByCap > 0) {
app.log.info(
{ ...initialPurge, config: auditPurgeConfig },
"webhook audit purge (boot)",
);
}
const auditPurgeIntervalMs = Math.max(60_000, envNumber(process.env.WEBHOOK_AUDIT_PURGE_INTERVAL_MS, 3_600_000));
const auditPurgeTimer = setInterval(() => {
try {
const report = runWebhookAuditPurge(db, auditPurgeConfig);
if (report.deletedByAge > 0 || report.deletedByCap > 0) {
app.log.info({ ...report, config: auditPurgeConfig }, "webhook audit purge (timer)");
}
} catch (error) {
app.log.error({ err: error }, "webhook audit purge failed");
}
}, auditPurgeIntervalMs);
auditPurgeTimer.unref?.();
const shutdown = async () => {
app.log.info("shutdown requested");
clearInterval(auditPurgeTimer);
await app.close();
db.close();
};
process.on("SIGINT", shutdown);
process.on("SIGTERM", shutdown);
await app.listen({ port, host: "0.0.0.0" });

View File

@@ -0,0 +1,140 @@
import { randomUUID } from "node:crypto";
import type { AppDatabase } from "../db/database.js";
export type ChatSessionRecord = {
id: string;
user_id: string;
title: string | null;
system_prompt: string | null;
created_at: string;
updated_at: string;
};
export type ChatMessageRecord = {
id: string;
session_id: string;
user_id: string;
role: "user" | "assistant" | "system" | "tool";
content: string;
metadata: string | null;
created_at: string;
};
export function createSessionRepository(db: AppDatabase) {
return {
list(userId: string): ChatSessionRecord[] {
return db
.prepare("SELECT * FROM chat_sessions WHERE user_id = ? ORDER BY updated_at DESC")
.all(userId) as ChatSessionRecord[];
},
create(userId: string, title: string | null): ChatSessionRecord {
const now = new Date().toISOString();
const session: ChatSessionRecord = {
id: randomUUID(),
user_id: userId,
title,
system_prompt: null,
created_at: now,
updated_at: now,
};
db.prepare(
"INSERT INTO chat_sessions (id, user_id, title, created_at, updated_at) VALUES (?, ?, ?, ?, ?)",
).run(session.id, session.user_id, session.title, session.created_at, session.updated_at);
return session;
},
get(userId: string, sessionId: string): ChatSessionRecord | null {
return (
(db.prepare("SELECT * FROM chat_sessions WHERE id = ? AND user_id = ?").get(sessionId, userId) as
| ChatSessionRecord
| undefined) ?? null
);
},
delete(userId: string, sessionId: string): boolean {
const result = db.prepare("DELETE FROM chat_sessions WHERE id = ? AND user_id = ?").run(sessionId, userId);
return result.changes > 0;
},
deleteAllForUser(userId: string): number {
const result = db
.prepare("DELETE FROM chat_sessions WHERE user_id = ?")
.run(userId);
return Number(result.changes ?? 0);
},
touch(userId: string, sessionId: string): void {
db.prepare("UPDATE chat_sessions SET updated_at = ? WHERE id = ? AND user_id = ?").run(
new Date().toISOString(),
sessionId,
userId,
);
},
updateTitle(userId: string, sessionId: string, title: string): void {
db.prepare("UPDATE chat_sessions SET title = ?, updated_at = ? WHERE id = ? AND user_id = ?").run(
title,
new Date().toISOString(),
sessionId,
userId,
);
},
updateSystemPrompt(userId: string, sessionId: string, prompt: string | null): boolean {
const normalized = prompt && prompt.trim().length > 0 ? prompt.trim() : null;
const result = db
.prepare(
"UPDATE chat_sessions SET system_prompt = ?, updated_at = ? WHERE id = ? AND user_id = ?",
)
.run(normalized, new Date().toISOString(), sessionId, userId);
return result.changes > 0;
},
};
}
export function createMessageRepository(db: AppDatabase) {
return {
listForSession(userId: string, sessionId: string): ChatMessageRecord[] {
return db
.prepare("SELECT * FROM chat_messages WHERE session_id = ? AND user_id = ? ORDER BY created_at ASC")
.all(sessionId, userId) as ChatMessageRecord[];
},
create(input: {
sessionId: string;
userId: string;
role: ChatMessageRecord["role"];
content: string;
metadata?: unknown;
}): ChatMessageRecord {
const message: ChatMessageRecord = {
id: randomUUID(),
session_id: input.sessionId,
user_id: input.userId,
role: input.role,
content: input.content,
metadata: input.metadata ? JSON.stringify(input.metadata) : null,
created_at: new Date().toISOString(),
};
db.prepare(
`INSERT INTO chat_messages
(id, session_id, user_id, role, content, metadata, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?)`,
).run(
message.id,
message.session_id,
message.user_id,
message.role,
message.content,
message.metadata,
message.created_at,
);
return message;
},
};
}

View File

@@ -0,0 +1,172 @@
import type { FastifyInstance } from "fastify";
import { z } from "zod";
import { getAuthUser } from "../auth/index.js";
import type { AppDatabase } from "../db/database.js";
import { createMessageRepository, createSessionRepository } from "./repository.js";
const createSessionBody = z.object({
title: z.string().min(1).max(120).optional(),
});
const updateSessionBody = z.object({
title: z.string().trim().min(1).max(120),
});
const updateSystemPromptBody = z.object({
// Empty / whitespace-only strings clear the override; null is a no-op.
system_prompt: z.string().max(8_000).nullable().optional(),
});
export async function registerSessionRoutes(app: FastifyInstance, db: AppDatabase) {
const sessions = createSessionRepository(db);
const messages = createMessageRepository(db);
app.get("/api/sessions", async (request) => {
const user = await getAuthUser(request);
return { items: sessions.list(user.id) };
});
app.post("/api/sessions", async (request, reply) => {
const user = await getAuthUser(request);
const body = createSessionBody.parse(request.body ?? {});
const session = sessions.create(user.id, body.title ?? null);
return reply.code(201).send(session);
});
app.get<{ Params: { id: string } }>("/api/sessions/:id", async (request, reply) => {
const user = await getAuthUser(request);
const session = sessions.get(user.id, request.params.id);
if (!session) {
return reply.code(404).send({ error: "session_not_found" });
}
return {
...session,
messages: messages.listForSession(user.id, session.id),
};
});
app.patch<{ Params: { id: string } }>("/api/sessions/:id", async (request, reply) => {
const user = await getAuthUser(request);
const body = updateSessionBody.parse(request.body);
const session = sessions.get(user.id, request.params.id);
if (!session) {
return reply.code(404).send({ error: "session_not_found" });
}
sessions.updateTitle(user.id, session.id, body.title);
return sessions.get(user.id, session.id);
});
// Per-session system prompt override. Inserted into the chat stream
// immediately after the base identity prompt, before the docs/actions
// context. Use to attach incident-specific context (runbook link, on-call
// names, severity matrix) without polluting the global prompt.
app.patch<{ Params: { id: string } }>(
"/api/sessions/:id/system-prompt",
async (request, reply) => {
const user = await getAuthUser(request);
const body = updateSystemPromptBody.parse(request.body ?? {});
const session = sessions.get(user.id, request.params.id);
if (!session) {
return reply.code(404).send({ error: "session_not_found" });
}
sessions.updateSystemPrompt(user.id, session.id, body.system_prompt ?? null);
return sessions.get(user.id, session.id);
},
);
app.delete<{ Params: { id: string } }>("/api/sessions/:id", async (request, reply) => {
const user = await getAuthUser(request);
const deleted = sessions.delete(user.id, request.params.id);
if (!deleted) {
return reply.code(404).send({ error: "session_not_found" });
}
return reply.code(204).send();
});
// Bulk delete: wipes every session owned by the current user. Cascade
// removes the messages and webhook_runs that point at them. The frontend
// requires the user to type the literal word "delete" before this fires.
app.delete("/api/sessions", async (request, reply) => {
const user = await getAuthUser(request);
const removed = sessions.deleteAllForUser(user.id);
return reply.code(200).send({ deleted: removed });
});
// Export: returns a JSON document with the session metadata and all its
// messages. The shape is a stable contract so a `POST /api/sessions/import`
// can read it back. webhook_runs are intentionally excluded from the
// export — those are operational audit data, not conversation content.
app.get<{ Params: { id: string } }>("/api/sessions/:id/export", async (request, reply) => {
const user = await getAuthUser(request);
const session = sessions.get(user.id, request.params.id);
if (!session) {
return reply.code(404).send({ error: "session_not_found" });
}
return {
version: 1,
exported_at: new Date().toISOString(),
session: {
id: session.id,
title: session.title,
created_at: session.created_at,
updated_at: session.updated_at,
},
messages: messages.listForSession(user.id, session.id),
};
});
// Import: accepts the export document above, creates a new session owned
// by the caller, and writes the messages with fresh ids. Returns the new
// session id and a count of imported messages.
const importSessionBody = z.object({
session: z.object({
title: z.string().max(120).nullable().optional(),
created_at: z.string().optional(),
updated_at: z.string().optional(),
}),
messages: z.array(
z.object({
role: z.enum(["user", "assistant", "system"]),
content: z.string().min(1).max(50_000),
metadata: z.record(z.unknown()).optional(),
// Original created_at is preserved if present; otherwise "now" is
// used. Used only to restore the timeline.
created_at: z.string().optional(),
}),
),
});
app.post("/api/sessions/import", async (request, reply) => {
const user = await getAuthUser(request);
const body = importSessionBody.parse(request.body);
const newSession = sessions.create(user.id, body.session.title ?? null);
let imported = 0;
for (const message of body.messages) {
messages.create({
sessionId: newSession.id,
userId: user.id,
role: message.role,
content: message.content,
metadata: message.metadata,
});
imported += 1;
}
sessions.touch(user.id, newSession.id);
return reply.code(201).send({
session: newSession,
imported_messages: imported,
});
});
}

View File

@@ -0,0 +1,57 @@
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { parse } from "yaml";
import { envString } from "../env.js";
export type SkillDefinition = {
id: string;
name: string;
description: string;
enabled: boolean;
prompt: string;
};
export type PublicSkillDefinition = Omit<SkillDefinition, "prompt">;
type SkillsFile = { skills?: SkillDefinition[] };
const defaultPath = (): string => {
// When the API is started from apps/api, the config dir is at ../../config.
// The env var wins so tests / docker setups can override.
return envString(process.env.SKILLS_CONFIG_PATH, resolve(process.cwd(), "../../config/skills.yml"));
};
export const loadSkillDefinitions = (
configPath: string = defaultPath(),
): SkillDefinition[] => {
try {
const raw = readFileSync(configPath, "utf8");
const parsed = parse(raw) as SkillsFile;
if (!parsed || !Array.isArray(parsed.skills)) return [];
return parsed.skills
.filter((skill) => skill && typeof skill === "object")
.map((skill) => ({
id: String(skill.id ?? "").trim(),
name: String(skill.name ?? "").trim(),
description: String(skill.description ?? "").trim(),
enabled: Boolean(skill.enabled),
prompt: String(skill.prompt ?? "").trim(),
}))
.filter((skill) => skill.id.length > 0);
} catch (error) {
// Config is optional: missing file is fine, malformed file should surface.
const code = (error as NodeJS.ErrnoException).code;
if (code === "ENOENT") return [];
throw error;
}
};
export const getEnabledSkillPrompts = (skills: SkillDefinition[] = loadSkillDefinitions()): string[] =>
skills.filter((skill) => skill.enabled && skill.prompt.length > 0).map((skill) => skill.prompt);
export const toPublicSkill = (skill: SkillDefinition): PublicSkillDefinition => ({
id: skill.id,
name: skill.name,
description: skill.description,
enabled: skill.enabled,
});

View File

@@ -0,0 +1,9 @@
import type { FastifyInstance } from "fastify";
import { loadSkillDefinitions, toPublicSkill } from "./config.js";
export const registerSkillRoutes = async (app: FastifyInstance) => {
app.get("/api/skills", async () => {
const items = loadSkillDefinitions().map(toPublicSkill);
return { items };
});
};

View File

@@ -0,0 +1,162 @@
import { randomUUID } from "node:crypto";
import type { AppDatabase } from "../db/database.js";
import { envNumber } from "../env.js";
export type WebhookRunStatus = "success" | "error";
export type WebhookRunRecord = {
id: string;
webhook_id: string;
user_id: string;
session_id: string;
status: WebhookRunStatus;
request_payload: string | null;
response_status: number | null;
attempts: number;
created_at: string;
};
export const createWebhookAuditRepository = (db: AppDatabase) => ({
create(input: {
webhookId: string;
userId: string;
sessionId: string;
status: WebhookRunStatus;
requestPayload?: unknown;
responseStatus?: number | null;
attempts?: number;
createdAt?: string;
}) {
const run = {
id: randomUUID(),
webhook_id: input.webhookId,
user_id: input.userId,
session_id: input.sessionId,
status: input.status,
request_payload: input.requestPayload ? JSON.stringify(input.requestPayload) : null,
response_status: input.responseStatus ?? null,
attempts: input.attempts ?? 1,
created_at: input.createdAt ?? new Date().toISOString(),
};
db.prepare(
`INSERT INTO webhook_runs (id, webhook_id, user_id, session_id, status, request_payload, response_status, attempts, created_at)
VALUES (@id, @webhook_id, @user_id, @session_id, @status, @request_payload, @response_status, @attempts, @created_at)`,
).run(run);
return run;
},
listForSession(userId: string, sessionId: string, limit = 20) {
return db
.prepare(
`SELECT id, webhook_id, user_id, session_id, status, request_payload, response_status, attempts, created_at
FROM webhook_runs
WHERE session_id = ? AND user_id = ?
ORDER BY created_at DESC
LIMIT ?`,
)
.all(sessionId, userId, limit) as WebhookRunRecord[];
},
purgeOlderThan(isoCutoff: string): number {
const result = db
.prepare(`DELETE FROM webhook_runs WHERE created_at < ?`)
.run(isoCutoff);
return Number(result.changes ?? 0);
},
enforcePerUserCap(maxPerUser: number): number {
if (maxPerUser <= 0) return 0;
// Uses SQLite window function (3.25+) to keep the most recent N rows
// per user and delete the rest. user_id is always present in the table.
const result = db
.prepare(
`DELETE FROM webhook_runs
WHERE rowid IN (
SELECT rowid FROM (
SELECT rowid,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC, rowid DESC) AS rn
FROM webhook_runs
)
WHERE rn > ?
)`,
)
.run(maxPerUser);
return Number(result.changes ?? 0);
},
/**
* Per-webhook usage stats for a single user over a recent time window.
* `isoSince` should be a UTC ISO string (e.g. now - 7 days).
* Returns a map of webhook_id -> { runs, successes, successRate }.
*/
usageForUserSince(isoSince: string, userId: string): Record<
string,
{ runs: number; successes: number; successRate: number }
> {
const rows = db
.prepare(
`SELECT webhook_id,
COUNT(*) AS runs,
SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) AS successes
FROM webhook_runs
WHERE user_id = ? AND created_at >= ?
GROUP BY webhook_id`,
)
.all(userId, isoSince) as Array<{
webhook_id: string;
runs: number;
successes: number | null;
}>;
const out: Record<string, { runs: number; successes: number; successRate: number }> = {};
for (const row of rows) {
const runs = Number(row.runs ?? 0);
const successes = Number(row.successes ?? 0);
out[row.webhook_id] = {
runs,
successes,
successRate: runs > 0 ? successes / runs : 0,
};
}
return out;
},
});
export type WebhookAuditPurgeConfig = {
retentionDays: number;
maxPerUser: number;
};
export const webhookAuditPurgeConfigFromEnv = (): WebhookAuditPurgeConfig => {
const retentionDays = Math.max(0, envNumber(process.env.WEBHOOK_RUNS_RETENTION_DAYS, 30));
const maxPerUser = Math.max(0, envNumber(process.env.WEBHOOK_RUNS_MAX_PER_USER, 1000));
return { retentionDays, maxPerUser };
};
export type WebhookAuditPurgeReport = {
deletedByAge: number;
deletedByCap: number;
cutoff: string | null;
};
/**
* Run both purge passes (age + per-user cap) against the audit table.
* Returns a small report for logging / health endpoints. Safe to call on
* every boot and on a timer.
*/
export const runWebhookAuditPurge = (
db: AppDatabase,
config: WebhookAuditPurgeConfig = webhookAuditPurgeConfigFromEnv(),
): WebhookAuditPurgeReport => {
const audit = createWebhookAuditRepository(db);
let deletedByAge = 0;
let cutoff: string | null = null;
if (config.retentionDays > 0) {
cutoff = new Date(Date.now() - config.retentionDays * 86_400_000).toISOString();
deletedByAge = audit.purgeOlderThan(cutoff);
}
const deletedByCap = config.maxPerUser > 0 ? audit.enforcePerUserCap(config.maxPerUser) : 0;
return { deletedByAge, deletedByCap, cutoff };
};

View File

@@ -0,0 +1,53 @@
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import YAML from "yaml";
import { envString } from "../env.js";
export type WebhookMethod = "GET" | "POST" | "PUT" | "PATCH" | "DELETE";
export type WebhookDefinition = {
id: string;
label: string;
description?: string;
method: WebhookMethod;
url: string;
required_roles: string[];
confirmation_required: boolean;
payload_template?: unknown;
};
export type PublicWebhookDefinition = Omit<WebhookDefinition, "url" | "payload_template">;
const defaultConfigPath = () => resolve(process.cwd(), "../../config/webhooks.yml");
const expandEnv = (value: string) =>
value.replace(/\$\{([A-Z0-9_]+)\}/g, (_match, name: string) => process.env[name] ?? "");
export const loadWebhookDefinitions = (
configPath = process.env.WEBHOOKS_CONFIG_PATH && process.env.WEBHOOKS_CONFIG_PATH.length > 0
? process.env.WEBHOOKS_CONFIG_PATH
: defaultConfigPath(),
): WebhookDefinition[] => {
const raw = readFileSync(configPath, "utf8");
const parsed = YAML.parse(raw) as { webhooks?: WebhookDefinition[] } | undefined;
return (parsed?.webhooks ?? []).map((webhook) => ({
...webhook,
method: webhook.method.toUpperCase() as WebhookMethod,
url: expandEnv(webhook.url),
required_roles: webhook.required_roles ?? [],
confirmation_required: webhook.confirmation_required ?? true,
}));
};
export const canUseWebhook = (userRoles: string[], webhook: WebhookDefinition) =>
webhook.required_roles.every((role) => userRoles.includes(role));
export const toPublicWebhook = (webhook: WebhookDefinition): PublicWebhookDefinition => ({
id: webhook.id,
label: webhook.label,
description: webhook.description,
method: webhook.method,
required_roles: webhook.required_roles,
confirmation_required: webhook.confirmation_required,
});

View File

@@ -0,0 +1,288 @@
import type { AuthUser } from "@pi-chat/shared";
import type { FastifyInstance } from "fastify";
import { z } from "zod";
import type { AppDatabase } from "../db/database.js";
import { getAuthUser } from "../auth/index.js";
import { webhookRateLimiterFromEnv } from "../rate-limit.js";
import { createSessionRepository } from "../sessions/repository.js";
import { createWebhookAuditRepository } from "./audit.js";
import { canUseWebhook, loadWebhookDefinitions, toPublicWebhook } from "./config.js";
import { envNumber } from "../env.js";
const runWebhookBody = z.object({
sessionId: z.string().min(1),
confirmed: z.literal(true),
lastUserMessage: z.string().max(envNumber(process.env.CHAT_MESSAGE_MAX_CHARS, 8_000)).default(""),
payload: z.record(z.unknown()).default({}),
});
const webhookRunsQuery = z.object({
sessionId: z.string().min(1),
limit: z.coerce.number().int().min(1).max(50).default(20),
});
const renderTemplate = (template: unknown, context: Record<string, unknown>): unknown => {
if (typeof template === "string") {
return template.replace(/\{\{([a-zA-Z0-9_.]+)\}\}/g, (_match, path: string) => {
const value = path.split(".").reduce<unknown>((current, key) => {
if (current && typeof current === "object" && key in current) {
return (current as Record<string, unknown>)[key];
}
return "";
}, context);
return value == null ? "" : String(value);
});
}
if (Array.isArray(template)) {
return template.map((item) => renderTemplate(item, context));
}
if (template && typeof template === "object") {
return Object.fromEntries(
Object.entries(template).map(([key, value]) => [key, renderTemplate(value, context)]),
);
}
return template;
};
const buildPayload = (template: unknown, input: z.infer<typeof runWebhookBody>, user: AuthUser) => {
const templated = renderTemplate(template ?? {}, {
user,
session: { id: input.sessionId },
chat: { last_user_message: input.lastUserMessage },
});
return {
...(templated && typeof templated === "object" && !Array.isArray(templated) ? templated : {}),
...input.payload,
};
};
const fetchWithTimeout = async (url: string, init: RequestInit, timeoutMs: number) => {
const abortController = new AbortController();
const timeout = setTimeout(() => abortController.abort(), timeoutMs);
return fetch(url, { ...init, signal: abortController.signal }).finally(() => clearTimeout(timeout));
};
const sleep = (ms: number) =>
new Promise<void>((resolve) => {
setTimeout(resolve, ms);
});
const isRetryableStatus = (status: number) => status >= 500 || status === 429;
type RetryPolicy = {
maxAttempts: number;
initialBackoffMs: number;
maxBackoffMs: number;
timeoutMs: number;
};
const retryPolicyFromEnv = (): RetryPolicy => {
const maxAttempts = Math.max(1, envNumber(process.env.WEBHOOK_RETRY_MAX_ATTEMPTS, 3));
const initialBackoffMs = Math.max(0, envNumber(process.env.WEBHOOK_RETRY_INITIAL_BACKOFF_MS, 500));
const maxBackoffMs = Math.max(initialBackoffMs, envNumber(process.env.WEBHOOK_RETRY_MAX_BACKOFF_MS, 5_000));
const timeoutMs = Math.max(1, envNumber(process.env.WEBHOOK_TIMEOUT_MS, 15_000));
return { maxAttempts, initialBackoffMs, maxBackoffMs, timeoutMs };
};
type RunOutcome = {
response: Response | null;
attempts: number;
lastError: unknown;
};
const runWithRetry = async (url: string, init: RequestInit, policy: RetryPolicy): Promise<RunOutcome> => {
let lastError: unknown = null;
let response: Response | null = null;
for (let attempt = 1; attempt <= policy.maxAttempts; attempt++) {
try {
response = await fetchWithTimeout(url, init, policy.timeoutMs);
if (response.ok) {
return { response, attempts: attempt, lastError: null };
}
if (!isRetryableStatus(response.status)) {
// 4xx (non-429): don't retry, surface as-is.
return { response, attempts: attempt, lastError: null };
}
} catch (error) {
lastError = error;
response = null;
}
if (attempt < policy.maxAttempts) {
const backoff = Math.min(
policy.maxBackoffMs,
policy.initialBackoffMs * 2 ** (attempt - 1),
);
await sleep(backoff);
}
}
return { response, attempts: policy.maxAttempts, lastError };
};
export const registerWebhookRoutes = async (app: FastifyInstance, db: AppDatabase) => {
const sessions = createSessionRepository(db);
const audit = createWebhookAuditRepository(db);
const webhookRateLimiter = webhookRateLimiterFromEnv();
app.get("/api/webhooks", async (request) => {
const user = await getAuthUser(request);
const items = loadWebhookDefinitions()
.filter((webhook) => canUseWebhook(user.roles, webhook))
.map(toPublicWebhook);
return { items };
});
app.get("/api/webhooks/usage", async (request) => {
const user = await getAuthUser(request);
const query = z
.object({ days: z.coerce.number().int().min(1).max(365).default(7) })
.parse(request.query ?? {});
const since = new Date(Date.now() - query.days * 86_400_000).toISOString();
const usage = audit.usageForUserSince(since, user.id);
const items = Object.entries(usage).map(([webhook_id, stats]) => ({
webhook_id,
runs: stats.runs,
successes: stats.successes,
success_rate: stats.successRate,
window_days: query.days,
}));
return { window_days: query.days, items };
});
app.get("/api/webhook-runs", async (request, reply) => {
const user = await getAuthUser(request);
const query = webhookRunsQuery.parse(request.query);
const session = sessions.get(user.id, query.sessionId);
if (!session) {
return reply.code(404).send({ error: "session_not_found" });
}
const items = audit.listForSession(user.id, query.sessionId, query.limit).map((run) => ({
id: run.id,
webhook_id: run.webhook_id,
session_id: run.session_id,
status: run.status,
response_status: run.response_status,
attempts: run.attempts,
created_at: run.created_at,
}));
return { items };
});
app.post("/api/webhooks/:id/run", async (request, reply) => {
const user = await getAuthUser(request);
const params = z.object({ id: z.string().min(1) }).parse(request.params);
const body = runWebhookBody.parse(request.body);
const webhook = loadWebhookDefinitions().find((item) => item.id === params.id);
if (!webhook || !canUseWebhook(user.roles, webhook)) {
return reply.code(404).send({ error: "webhook_not_found" });
}
// Per-webhook abuse detection: each webhook_id has its own bucket so a
// runaway loop on one hook doesn't starve the rest. The bucket is shared
// across all users on purpose — that's the abuse signal.
const decision = webhookRateLimiter.consume(webhook.id);
if (!decision.ok) {
const retryAfterSec = Math.max(1, Math.ceil(decision.retryAfterMs / 1000));
app.log.warn(
{ webhook: webhook.id, user: user.id, retryAfterSec },
"webhook rate limit exceeded",
);
return reply
.code(429)
.header("retry-after", String(retryAfterSec))
.header("x-ratelimit-remaining", "0")
.send({
error: "rate_limited",
retry_after_ms: decision.retryAfterMs,
});
}
reply.header("x-ratelimit-remaining", String(decision.remaining));
const session = sessions.get(user.id, body.sessionId);
if (!session) {
return reply.code(404).send({ error: "session_not_found" });
}
const requestPayload = buildPayload(webhook.payload_template, body, user);
if (!webhook.url) {
audit.create({
webhookId: webhook.id,
userId: user.id,
sessionId: body.sessionId,
status: "error",
requestPayload,
attempts: 0,
});
return reply.code(500).send({ error: "webhook_not_configured" });
}
const policy = retryPolicyFromEnv();
const outcome = await runWithRetry(
webhook.url,
{
method: webhook.method,
headers: { "content-type": "application/json" },
body: webhook.method === "GET" ? undefined : JSON.stringify(requestPayload),
},
policy,
);
if (outcome.lastError) {
app.log.error({ err: outcome.lastError, webhook: webhook.id, attempts: outcome.attempts }, "webhook request failed after retries");
} else if (outcome.attempts > 1 && outcome.response) {
app.log.warn(
{ webhook: webhook.id, attempts: outcome.attempts, status: outcome.response.status },
"webhook request retried",
);
}
const response = outcome.response;
const httpOk = response?.ok ?? false;
const isTransportError = !response;
const run = audit.create({
webhookId: webhook.id,
userId: user.id,
sessionId: body.sessionId,
status: httpOk ? "success" : "error",
requestPayload,
responseStatus: response?.status ?? null,
attempts: outcome.attempts,
});
if (isTransportError) {
return reply.code(502).send({
id: run.id,
webhook_id: run.webhook_id,
status: run.status,
response_status: run.response_status,
attempts: run.attempts,
error: "webhook_request_failed",
});
}
return reply.code(httpOk ? 200 : 502).send({
id: run.id,
webhook_id: run.webhook_id,
status: run.status,
response_status: run.response_status,
attempts: run.attempts,
});
});
};

View File

@@ -0,0 +1,151 @@
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { existsSync, mkdirSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import Database from "better-sqlite3";
import type { AppDatabase } from "../src/db/database.js";
import { migrate } from "../src/db/migrate.js";
import { createSessionRepository, createMessageRepository } from "../src/sessions/repository.js";
import { createWebhookAuditRepository } from "../src/webhooks/audit.js";
import { runWebhookAuditPurge } from "../src/webhooks/audit.js";
let db: AppDatabase;
let dbPath: string;
beforeEach(() => {
const dir = join(tmpdir(), `sic-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
mkdirSync(dir, { recursive: true });
dbPath = join(dir, "test.db");
db = new Database(dbPath);
db.pragma("foreign_keys = ON");
migrate(db);
});
afterEach(() => {
db.close();
if (existsSync(dbPath)) rmSync(dbPath, { force: true });
});
describe("session isolation", () => {
it("never returns sessions/messages from another user", () => {
const sessions = createSessionRepository(db);
const messages = createMessageRepository(db);
const a = sessions.create("user-a", "A session");
const b = sessions.create("user-b", "B session");
messages.create({ sessionId: a.id, userId: "user-a", role: "user", content: "a msg" });
messages.create({ sessionId: b.id, userId: "user-b", role: "user", content: "b msg" });
// List filter
expect(sessions.list("user-a").map((s) => s.id)).toEqual([a.id]);
expect(sessions.list("user-b").map((s) => s.id)).toEqual([b.id]);
// get() requires matching user_id
expect(sessions.get("user-a", b.id)).toBeFalsy();
expect(sessions.get("user-b", a.id)).toBeFalsy();
// Messages filter by both session_id and user_id
const bMessages = messages.listForSession("user-a", b.id);
expect(bMessages).toEqual([]);
const aMessages = messages.listForSession("user-a", a.id);
expect(aMessages).toHaveLength(1);
expect(aMessages[0]?.content).toBe("a msg");
});
it("delete cascades to messages", () => {
const sessions = createSessionRepository(db);
const messages = createMessageRepository(db);
const s = sessions.create("user-a", null);
const m = messages.create({
sessionId: s.id,
userId: "user-a",
role: "user",
content: "will be cascaded",
});
sessions.delete("user-a", s.id);
expect(messages.listForSession("user-a", s.id)).toEqual([]);
// Direct DB check that the message row is gone (not just hidden)
const row = db.prepare("SELECT id FROM chat_messages WHERE id = ?").get(m.id);
expect(row).toBeUndefined();
});
it("updateTitle only affects the owner's session", () => {
const sessions = createSessionRepository(db);
const a = sessions.create("user-a", "Old");
sessions.updateTitle("user-a", a.id, "New");
expect(sessions.get("user-a", a.id)?.title).toBe("New");
});
});
describe("webhook audit + retention", () => {
it("usageForUserSince aggregates per webhook", () => {
const sessions = createSessionRepository(db);
const audit = createWebhookAuditRepository(db);
const s1 = sessions.create("user-a", "test");
const s2 = sessions.create("user-b", "test");
const now = Date.now();
const fresh = new Date(now - 1_000).toISOString();
const old = new Date(now - 100 * 86_400_000).toISOString();
audit.create({ webhookId: "dns-flush", userId: "user-a", sessionId: s1.id, status: "success", createdAt: fresh });
audit.create({ webhookId: "dns-flush", userId: "user-a", sessionId: s1.id, status: "success", createdAt: fresh });
audit.create({ webhookId: "dns-flush", userId: "user-a", sessionId: s1.id, status: "error", createdAt: fresh });
audit.create({ webhookId: "dns-flush", userId: "user-a", sessionId: s1.id, status: "success", createdAt: old });
audit.create({ webhookId: "other-hook", userId: "user-b", sessionId: s2.id, status: "success", createdAt: fresh });
const since = new Date(now - 7 * 86_400_000).toISOString();
const usage = audit.usageForUserSince(since, "user-a");
expect(usage["dns-flush"]?.runs).toBe(3);
expect(usage["dns-flush"]?.successes).toBe(2);
expect(usage["dns-flush"]?.successRate).toBeCloseTo(2 / 3);
expect(usage["other-hook"]).toBeUndefined();
});
it("retention purge deletes old rows but keeps recent ones", () => {
const sessions = createSessionRepository(db);
const audit = createWebhookAuditRepository(db);
const s = sessions.create("user-a", "test");
const now = Date.now();
const fresh = new Date(now - 60_000).toISOString();
const stale = new Date(now - 100 * 86_400_000).toISOString();
audit.create({ webhookId: "w", userId: "user-a", sessionId: s.id, status: "success", createdAt: fresh });
audit.create({ webhookId: "w", userId: "user-a", sessionId: s.id, status: "success", createdAt: stale });
audit.create({ webhookId: "w", userId: "user-a", sessionId: s.id, status: "success", createdAt: stale });
const report = runWebhookAuditPurge(db, { retentionDays: 30, maxPerUser: 0 });
expect(report.deletedByAge).toBe(2);
const remaining = db.prepare("SELECT COUNT(*) as n FROM webhook_runs").get() as { n: number };
expect(remaining.n).toBe(1);
});
it("per-user cap keeps the most recent N", () => {
const sessions = createSessionRepository(db);
const audit = createWebhookAuditRepository(db);
const s = sessions.create("user-a", "test");
const now = Date.now();
for (let i = 0; i < 8; i++) {
const ts = new Date(now - i * 1000).toISOString();
audit.create({ webhookId: "w", userId: "user-a", sessionId: s.id, status: "success", createdAt: ts });
}
const report = runWebhookAuditPurge(db, { retentionDays: 0, maxPerUser: 3 });
expect(report.deletedByCap).toBe(5);
const remaining = db.prepare("SELECT COUNT(*) as n FROM webhook_runs").get() as { n: number };
expect(remaining.n).toBe(3);
});
it("listForSession enforces user_id", () => {
const sessions = createSessionRepository(db);
const audit = createWebhookAuditRepository(db);
const sa = sessions.create("user-a", "test");
const sb = sessions.create("user-b", "test");
audit.create({ webhookId: "w", userId: "user-a", sessionId: sa.id, status: "success" });
audit.create({ webhookId: "w", userId: "user-b", sessionId: sb.id, status: "success" });
expect(audit.listForSession("user-a", sa.id)).toHaveLength(1);
expect(audit.listForSession("user-b", sb.id)).toHaveLength(1);
expect(audit.listForSession("user-a", sa.id)[0]?.user_id).toBe("user-a");
});
});

View File

@@ -0,0 +1,118 @@
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { createServer, type Server } from "node:http";
let server: Server;
let port = 0;
beforeEach(async () => {
await new Promise<void>((resolve) => {
server = createServer((req, res) => {
const url = new URL(req.url ?? "/", `http://${req.headers.host}`);
// Decode the path so ids with reserved characters (e.g. "runbooks:vpn")
// match whether the client encoded the colon as %3A or not.
const pathname = decodeURIComponent(url.pathname);
if (req.method === "POST" && pathname === "/search") {
let body = "";
req.on("data", (c) => (body += c));
req.on("end", () => {
res.writeHead(200, { "content-type": "application/json" });
res.end(
JSON.stringify({
items: [
{ id: "remote:1", title: "Remote doc", source: "remote", tags: ["remote"], relevance: 0.9, excerpt: "x" },
],
}),
);
});
return;
}
if (req.method === "GET" && pathname === "/docs/remote:1") {
res.writeHead(200, { "content-type": "application/json" });
res.end(
JSON.stringify({
id: "remote:1",
title: "Remote doc",
source: "remote",
tags: ["remote"],
headings: ["Section"],
content: "Full remote content",
}),
);
return;
}
res.writeHead(404).end();
});
server.listen(0, "127.0.0.1", () => {
const a = server.address();
port = typeof a === "object" && a ? a.port : 0;
resolve();
});
});
});
afterEach(async () => {
await new Promise<void>((resolve) => server.close(() => resolve()));
});
describe("rag client", () => {
it("searches via the configured endpoint when set", async () => {
const { searchViaRag } = await import("../src/rag/client.js");
const items = await searchViaRag(
{
endpoint: `http://127.0.0.1:${port}`,
authToken: "",
timeoutMs: 5000,
fallbackToLocal: false,
chunkStrategy: "heading",
chunkSizeChars: 1500,
topK: 5,
minRelevance: 0,
includeTags: [],
excludeTags: [],
},
"anything",
3,
);
expect(items).toHaveLength(1);
expect(items[0]?.id).toBe("remote:1");
expect(items[0]?.relevance).toBe(0.9);
});
it("fetches a single doc via the endpoint", async () => {
const { getViaRag } = await import("../src/rag/client.js");
const doc = await getViaRag(
{
endpoint: `http://127.0.0.1:${port}`,
authToken: "secret",
timeoutMs: 5000,
fallbackToLocal: false,
chunkStrategy: "heading",
chunkSizeChars: 1500,
topK: 5,
minRelevance: 0,
includeTags: [],
excludeTags: [],
},
"remote:1",
);
expect(doc?.id).toBe("remote:1");
expect(doc?.content).toBe("Full remote content");
});
it("isRagRemote returns true when endpoint is set, false otherwise", async () => {
const { isRagRemote } = await import("../src/rag/client.js");
const base = {
authToken: "",
timeoutMs: 1000,
fallbackToLocal: true,
chunkStrategy: "heading" as const,
chunkSizeChars: 1500,
topK: 5,
minRelevance: 0,
includeTags: [],
excludeTags: [],
};
expect(isRagRemote({ ...base, endpoint: "" })).toBe(false);
expect(isRagRemote({ ...base, endpoint: "http://x" })).toBe(true);
});
});

View File

@@ -0,0 +1,57 @@
import { describe, expect, it } from "vitest";
import { createRateLimiter } from "../src/rate-limit.js";
describe("rate-limit", () => {
it("accepts up to burst then rejects", () => {
const lim = createRateLimiter({ perMinute: 60, burst: 3 });
expect(lim.consume("u1", 0)).toEqual({ ok: true, remaining: 2 });
expect(lim.consume("u1", 0)).toEqual({ ok: true, remaining: 1 });
expect(lim.consume("u1", 0)).toEqual({ ok: true, remaining: 0 });
const denied = lim.consume("u1", 0);
expect(denied.ok).toBe(false);
if (!denied.ok) {
expect(denied.retryAfterMs).toBeGreaterThan(0);
expect(denied.retryAfterMs).toBeLessThanOrEqual(1000);
}
});
it("refills tokens over time", () => {
const lim = createRateLimiter({ perMinute: 60, burst: 2 });
expect(lim.consume("u1", 0).ok).toBe(true);
expect(lim.consume("u1", 0).ok).toBe(true);
expect(lim.consume("u1", 0).ok).toBe(false);
// 1 second later, 1 token refilled
expect(lim.consume("u1", 1000).ok).toBe(true);
expect(lim.consume("u1", 1000).ok).toBe(false);
});
it("isolates buckets per id", () => {
const lim = createRateLimiter({ perMinute: 60, burst: 1 });
expect(lim.consume("u1", 0).ok).toBe(true);
expect(lim.consume("u1", 0).ok).toBe(false);
// u2 has its own bucket
expect(lim.consume("u2", 0).ok).toBe(true);
expect(lim.consume("u2", 0).ok).toBe(false);
});
it("caps refill at burst", () => {
const lim = createRateLimiter({ perMinute: 60, burst: 2 });
// Wait a long time, tokens should still be capped at 2
const result = lim.consume("u1", 60_000);
expect(result).toEqual({ ok: true, remaining: 1 });
expect(lim.consume("u1", 60_000).ok).toBe(true);
expect(lim.consume("u1", 60_000).ok).toBe(false);
});
it("reset clears a single bucket or all", () => {
const lim = createRateLimiter({ perMinute: 60, burst: 1 });
lim.consume("u1", 0);
lim.consume("u2", 0);
expect(lim.size()).toBe(2);
lim.reset("u1");
expect(lim.size()).toBe(1);
expect(lim.consume("u1", 0).ok).toBe(true);
lim.reset();
expect(lim.size()).toBe(0);
});
});

8
apps/api/tsconfig.json Normal file
View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "dist",
"rootDir": "src"
},
"include": ["src"]
}

28
apps/web/index.html Normal file
View File

@@ -0,0 +1,28 @@
<!doctype html>
<html lang="es">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="color-scheme" content="dark light" />
<title>SIC — Super Incident Commander</title>
<link id="favicon" rel="icon" type="image/png" href="/favicon-dark.png" />
<script>
// Apply persisted theme before paint so the favicon/logo match the saved preference.
(function () {
try {
var stored = window.localStorage.getItem("supr.theme");
var theme = stored === "light" ? "light" : "dark";
document.documentElement.dataset.theme = theme;
var fav = document.getElementById("favicon");
if (fav) fav.href = theme === "light" ? "/favicon-light.png" : "/favicon-dark.png";
} catch (error) {
/* localStorage unavailable, keep defaults */
}
})();
</script>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

25
apps/web/package.json Normal file
View File

@@ -0,0 +1,25 @@
{
"name": "@pi-chat/web",
"private": true,
"version": "0.1.0",
"type": "module",
"scripts": {
"dev": "vite --host 0.0.0.0",
"typecheck": "tsc --noEmit",
"lint": "tsc --noEmit"
},
"dependencies": {
"@pi-chat/shared": "workspace:*",
"@vitejs/plugin-react": "^4.5.2",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-markdown": "^10.1.0",
"remark-gfm": "^4.0.1",
"vite": "^6.3.5"
},
"devDependencies": {
"@types/react": "^19.1.8",
"@types/react-dom": "^19.1.6",
"typescript": "^5.8.3"
}
}

BIN
apps/web/public/agent.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 832 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 892 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 701 KiB

85
apps/web/src/DocModal.tsx Normal file
View File

@@ -0,0 +1,85 @@
import { useEffect } from "react";
import { MarkdownView } from "./code-block";
export type KnowledgeDoc = {
id: string;
title: string;
source: string;
tags: string[];
owner?: string;
updated?: string;
headings: string[];
content: string;
};
type DocModalProps = {
doc: KnowledgeDoc;
onClose: () => void;
labels: {
close: string;
tags: string;
owner: string;
updated: string;
};
};
const DocModal = ({ doc, onClose, labels }: DocModalProps) => {
useEffect(() => {
const onKey = (event: KeyboardEvent) => {
if (event.key === "Escape") onClose();
};
window.addEventListener("keydown", onKey);
return () => window.removeEventListener("keydown", onKey);
}, [onClose]);
return (
<div
aria-label={doc.title}
className="docModalBackdrop"
onClick={onClose}
role="dialog"
>
<article className="docModal" onClick={(event) => event.stopPropagation()}>
<header className="docModalHeader">
<div>
<h2>{doc.title}</h2>
<small>{doc.source}</small>
</div>
<button
aria-label={labels.close}
className="iconToggle"
onClick={onClose}
type="button"
>
</button>
</header>
<dl className="docMeta">
{doc.tags.length > 0 ? (
<div>
<dt>{labels.tags}</dt>
<dd>{doc.tags.join(", ")}</dd>
</div>
) : null}
{doc.owner ? (
<div>
<dt>{labels.owner}</dt>
<dd>{doc.owner}</dd>
</div>
) : null}
{doc.updated ? (
<div>
<dt>{labels.updated}</dt>
<dd>{doc.updated}</dd>
</div>
) : null}
</dl>
<div className="docModalBody">
<MarkdownView source={doc.content} />
</div>
</article>
</div>
);
};
export default DocModal;

View File

@@ -0,0 +1,54 @@
import { Component, type ReactNode } from "react";
type ErrorBoundaryProps = {
children: ReactNode;
};
type ErrorBoundaryState = {
error: Error | null;
};
export class ErrorBoundary extends Component<ErrorBoundaryProps, ErrorBoundaryState> {
state: ErrorBoundaryState = { error: null };
static getDerivedStateFromError(error: Error): ErrorBoundaryState {
return { error };
}
componentDidCatch(error: Error, info: { componentStack?: string }): void {
// Surface to the dev console; production telemetry would hook in here.
// eslint-disable-next-line no-console
console.error("[SIC] uncaught render error", error, info.componentStack);
}
private handleReload = () => {
window.location.reload();
};
private handleReset = () => {
this.setState({ error: null });
};
render() {
const { error } = this.state;
if (!error) return this.props.children;
return (
<div className="errorBoundary" role="alert">
<div className="errorBoundaryCard">
<strong className="panelHeading">{`Unrecoverable UI error`}</strong>
<p>Something went wrong while rendering SIC. Your sessions and messages are still saved on the server.</p>
<pre className="errorBoundaryMessage">{error.message}</pre>
<div className="errorBoundaryActions">
<button onClick={this.handleReset} type="button">
{`Try again`}
</button>
<button className="primaryAction" onClick={this.handleReload} type="button">
{`Reload page`}
</button>
</div>
</div>
</div>
);
}
}

View File

@@ -0,0 +1,231 @@
import { useEffect, useMemo, useState } from "react";
import type { FormEvent } from "react";
import { authorizedHeaders, authTokenFromStorage, jsonHeaders, api } from "./api";
type PublicWebhook = {
id: string;
label: string;
description?: string;
method: string;
required_roles: string[];
confirmation_required: boolean;
};
type RunResult =
| { kind: "idle" }
| { kind: "running" }
| { kind: "success"; responseStatus: number | null; runId: string }
| { kind: "error"; message: string };
type WebhookFormTabProps = {
webhookId: string;
sessionId: string;
onBack: () => void;
};
const labelsEn = {
title: "Run webhook",
description: "Description",
requiredRoles: "Required roles",
confirmation: "Requires confirmation",
method: "Method",
payload: "Payload (optional JSON)",
payloadHelp: "These fields are merged with the backend template. Available variables: {user}, {session}, {message}.",
run: "Run",
running: "Running...",
resultOk: "Webhook executed",
resultErr: "Failed to execute",
httpStatus: "HTTP",
runId: "Audit ID",
back: "Back to chat",
notFound: "Webhook not found or insufficient permissions",
loading: "Loading webhook...",
user: "User",
session: "Session",
};
const detectLanguage = (): "en" => "en";
const WebhookFormTabInner = ({ webhookId, sessionId, onBack }: WebhookFormTabProps) => {
const [labels] = useState(() => labelsEn);
const [webhook, setWebhook] = useState<PublicWebhook | null>(null);
const [payload, setPayload] = useState("{}");
const [result, setResult] = useState<RunResult>({ kind: "idle" });
const [error, setError] = useState<string | null>(null);
useEffect(() => {
void (async () => {
try {
const data = await api<{ items: PublicWebhook[] }>("/api/webhooks");
const found = data.items.find((item) => item.id === webhookId);
if (!found) {
setError(labels.notFound);
return;
}
setWebhook(found);
} catch (err) {
console.error(err);
setError(labels.notFound);
}
})();
}, [webhookId, labels.notFound]);
const submit = async (event: FormEvent) => {
event.preventDefault();
if (!webhook) return;
if (webhook.confirmation_required) {
const ok = window.confirm(`Run ${webhook.label}?`);
if (!ok) return;
}
let parsed: Record<string, unknown> = {};
if (payload.trim().length > 0) {
try {
const value = JSON.parse(payload);
if (value && typeof value === "object" && !Array.isArray(value)) {
parsed = value as Record<string, unknown>;
}
} catch {
setResult({ kind: "error", message: "Payload is not valid JSON" });
return;
}
}
setResult({ kind: "running" });
try {
const response = await fetch(`/api/webhooks/${webhook.id}/run`, {
method: "POST",
headers: jsonHeaders(),
body: JSON.stringify({
sessionId,
confirmed: true,
lastUserMessage: undefined,
payload: parsed,
}),
});
if (!response.ok) {
const detail = await response.text().catch(() => "");
throw new Error(`http_${response.status}: ${detail.slice(0, 200)}`);
}
const body = (await response.json()) as { id: string; response_status: number | null };
setResult({ kind: "success", responseStatus: body.response_status, runId: body.id });
} catch (err) {
console.error(err);
const message = err instanceof Error ? err.message : "error";
setResult({ kind: "error", message });
}
};
const tokenInfo = useMemo(() => {
const t = authTokenFromStorage();
return t ? `${t.slice(0, 12)}` : labels.notFound;
}, [labels.notFound]);
if (error) {
return (
<main className="formTab error">
<h1>{labels.title}</h1>
<p className="muted">{error}</p>
<button type="button" onClick={onBack}>{labels.back}</button>
</main>
);
}
if (!webhook) {
return (
<main className="formTab loading">
<h1>{labels.title}</h1>
<p className="muted">{labels.loading}</p>
</main>
);
}
return (
<main className="formTab">
<header className="formTabHeader">
<div>
<small>SIC</small>
<h1>{webhook.label}</h1>
</div>
<button type="button" onClick={onBack}>{labels.back}</button>
</header>
<dl className="formTabMeta">
{webhook.description ? (
<div>
<dt>{labels.description}</dt>
<dd>{webhook.description}</dd>
</div>
) : null}
<div>
<dt>{labels.method}</dt>
<dd><code>{webhook.method}</code></dd>
</div>
<div>
<dt>{labels.requiredRoles}</dt>
<dd>{webhook.required_roles.join(", ")}</dd>
</div>
<div>
<dt>{labels.confirmation}</dt>
<dd>{webhook.confirmation_required ? "Yes" : "No"}</dd>
</div>
<div>
<dt>{labels.session}</dt>
<dd><code>{sessionId.slice(0, 8)}</code></dd>
</div>
<div>
<dt>{labels.user}</dt>
<dd><code>{tokenInfo}</code></dd>
</div>
</dl>
<form onSubmit={submit} className="formTabForm">
<label>
<span>{labels.payload}</span>
<textarea
onChange={(e) => setPayload(e.target.value)}
rows={8}
spellCheck={false}
value={payload}
/>
</label>
<small className="muted">{labels.payloadHelp}</small>
<button
className="formTabRun"
disabled={result.kind === "running"}
type="submit"
>
{result.kind === "running" ? labels.running : labels.run}
</button>
</form>
{result.kind === "success" ? (
<section className="formTabResult success">
<strong>{labels.resultOk}</strong>
<small>{labels.httpStatus}: {result.responseStatus ?? "—"}</small>
<small>{labels.runId}: <code>{result.runId}</code></small>
</section>
) : null}
{result.kind === "error" ? (
<section className="formTabResult error">
<strong>{labels.resultErr}</strong>
<small>{result.message}</small>
</section>
) : null}
</main>
);
};
// Read query params helper for the main App.
export const getWebhookFormTabParams = () => {
const params = new URLSearchParams(window.location.search);
const webhook = params.get("webhook");
const session = params.get("session");
if (!webhook || !session) return null;
return { webhookId: webhook, sessionId: session };
};
const WebhookFormTab = WebhookFormTabInner;
export default WebhookFormTab;
export type { WebhookFormTabProps };

93
apps/web/src/api.ts Normal file
View File

@@ -0,0 +1,93 @@
export const AUTH_TOKEN_STORAGE_KEY = "pi-chat.authToken";
export const authTokenFromStorage = () => {
const stored = window.localStorage.getItem(AUTH_TOKEN_STORAGE_KEY)?.trim();
const configured = import.meta.env.VITE_AUTH_TOKEN?.trim();
return stored || configured || null;
};
// Only adds the Authorization header. Does NOT set content-type so callers
// that send a body without content-type don't get rejected by Fastify
// (DELETE/PATCH with content-type: application/json and an empty body 400s).
export const authorizedHeaders = (headers?: HeadersInit) => {
const result = new Headers(headers);
const token = authTokenFromStorage();
if (token && !result.has("authorization")) {
const authorization = token.toLowerCase().startsWith("bearer ") ? token : `Bearer ${token}`;
result.set("authorization", authorization);
}
return result;
};
// Convenience for requests that send a JSON body.
export const jsonHeaders = (headers?: HeadersInit) => {
const result = authorizedHeaders(headers);
if (!result.has("content-type")) {
result.set("content-type", "application/json");
}
return result;
};
export const api = async <T,>(path: string, init?: RequestInit): Promise<T> => {
// Only set content-type when there's actually a body. DELETE / PATCH /
// GET through this helper without an explicit body must NOT trigger the
// "Body cannot be empty when content-type is set to 'application/json'"
// 400 in Fastify. This makes `api()` safe for any verb.
const hasBody = init?.body !== undefined && init?.body !== null;
const headers = hasBody ? jsonHeaders(init?.headers) : authorizedHeaders(init?.headers);
const response = await fetch(path, { ...init, headers });
if (!response.ok) {
throw new Error(`api_error:${response.status}`);
}
// 204 No Content has no body to parse; don't blow up trying.
if (response.status === 204) return undefined as T;
// Some servers return 200 with empty body; guard the JSON parse too.
const text = await response.text();
if (text.length === 0) return undefined as T;
return JSON.parse(text) as T;
};
export const parseMetadata = (metadata: string | null) => {
if (!metadata) return null;
try {
return JSON.parse(metadata) as {
docs?: unknown[];
actions?: Array<{ id: string }>;
model?: string;
usage?: {
promptTokens?: number;
completionTokens?: number;
totalTokens?: number;
cachedTokens?: number;
durationMs?: number;
};
};
} catch {
return null;
}
};
export const formatDuration = (ms: number): string => {
if (ms < 1000) return `${ms} ms`;
if (ms < 60_000) return `${(ms / 1000).toFixed(1)} s`;
const minutes = Math.floor(ms / 60_000);
const seconds = Math.floor((ms % 60_000) / 1000);
return `${minutes}m ${seconds}s`;
};
export const formatNumber = (n: number): string => {
if (n >= 1000) return `${(n / 1000).toFixed(n >= 10_000 ? 0 : 1)}k`;
return String(n);
};
export const temporaryId = () => {
const randomUUID = window.crypto?.randomUUID?.bind(window.crypto);
if (randomUUID) return randomUUID();
return `tmp-${Date.now()}-${Math.random().toString(36).slice(2)}`;
};
export const formatScore = (value: unknown) =>
typeof value === "number" ? value.toFixed(2) : "s/d";

267
apps/web/src/code-block.tsx Normal file
View File

@@ -0,0 +1,267 @@
import { useEffect, useRef, useState } from "react";
import type { ReactNode } from "react";
import Markdown from "react-markdown";
import remarkGfm from "remark-gfm";
const copyIcon = (
<svg
aria-hidden="true"
viewBox="0 0 24 24"
width="14"
height="14"
fill="none"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
>
<rect x="9" y="9" width="11" height="11" rx="2" />
<path d="M5 15V5a2 2 0 0 1 2-2h10" />
</svg>
);
const downloadIcon = (
<svg
aria-hidden="true"
viewBox="0 0 24 24"
width="14"
height="14"
fill="none"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
>
<path d="M12 4v12" />
<path d="M6 12l6 6 6-6" />
<path d="M4 20h16" />
</svg>
);
const checkIcon = (
<svg
aria-hidden="true"
viewBox="0 0 24 24"
width="14"
height="14"
fill="none"
stroke="currentColor"
strokeWidth="2.4"
strokeLinecap="round"
strokeLinejoin="round"
>
<path d="M5 12.5l4.5 4.5L19 7" />
</svg>
);
const writeToClipboard = async (text: string): Promise<boolean> => {
if (!text) return false;
try {
await navigator.clipboard.writeText(text);
return true;
} catch {
// Fallback for non-secure contexts.
const textarea = document.createElement("textarea");
textarea.value = text;
textarea.style.position = "fixed";
textarea.style.opacity = "0";
document.body.appendChild(textarea);
textarea.select();
let ok = false;
try {
ok = document.execCommand("copy");
} catch {
ok = false;
} finally {
document.body.removeChild(textarea);
}
return ok;
}
};
const CodeBlock = ({ children }: { children?: ReactNode }) => {
const [copied, setCopied] = useState(false);
const codeRef = useRef<HTMLElement | null>(null);
const timerRef = useRef<number | null>(null);
useEffect(() => {
return () => {
if (timerRef.current !== null) {
window.clearTimeout(timerRef.current);
}
};
}, []);
const handleCopy = async () => {
const text = codeRef.current?.innerText ?? "";
await writeToClipboard(text);
setCopied(true);
if (timerRef.current !== null) {
window.clearTimeout(timerRef.current);
}
timerRef.current = window.setTimeout(() => setCopied(false), 1500);
};
const captureCodeRef = (element: HTMLElement | null) => {
codeRef.current = element?.querySelector("code") ?? null;
};
return (
<div className="codeBlock">
<button
aria-label={copied ? "Copied" : "Copy code"}
className={`codeCopy${copied ? " copied" : ""}`}
onClick={handleCopy}
title={copied ? "Copied" : "Copy code"}
type="button"
>
{copied ? checkIcon : copyIcon}
</button>
<pre ref={captureCodeRef}>{children}</pre>
</div>
);
};
type TableCell = { text: string; isHeader: boolean };
const readTable = (table: HTMLTableElement): { headers: string[]; rows: string[][] } => {
const rows = Array.from(table.querySelectorAll("tr"));
const matrix: TableCell[][] = rows.map((row) =>
Array.from(row.querySelectorAll("th,td")).map((cell) => ({
text: (cell.textContent ?? "").replace(/\s+/g, " ").trim(),
isHeader: cell.tagName.toLowerCase() === "th",
})),
);
if (matrix.length === 0) return { headers: [], rows: [] };
// If the first row is a header row, split it; otherwise synthesize generic headers.
const firstRow = matrix[0] ?? [];
const hasHeader = firstRow.length > 0 && firstRow.every((cell) => cell.isHeader);
const headers = hasHeader
? firstRow.map((cell) => cell.text)
: (matrix[0] ?? []).map((_, index) => `Column ${index + 1}`);
const dataRows = hasHeader ? matrix.slice(1) : matrix;
return {
headers,
rows: dataRows.map((row) => row.map((cell) => cell.text)),
};
};
const toTsv = (headers: string[], rows: string[][]): string => {
const escape = (value: string) => value.replace(/\t/g, " ").replace(/\n/g, " ");
return [headers, ...rows].map((row) => row.map(escape).join("\t")).join("\n");
};
const toCsv = (headers: string[], rows: string[][]): string => {
// RFC 4180: wrap fields containing comma, quote, or newline in double quotes;
// escape internal double quotes by doubling them.
const escape = (value: string) => {
if (/[",\n\r]/.test(value)) {
return `"${value.replace(/"/g, '""')}"`;
}
return value;
};
return [headers, ...rows].map((row) => row.map(escape).join(",")).join("\r\n");
};
const downloadFile = (filename: string, content: string, mime: string) => {
const blob = new Blob([content], { type: mime });
const url = URL.createObjectURL(blob);
const link = document.createElement("a");
link.href = url;
link.download = filename;
link.rel = "noreferrer";
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
// Defer revoke so the download starts cleanly.
window.setTimeout(() => URL.revokeObjectURL(url), 0);
};
const TableBlock = ({ children }: { children?: ReactNode }) => {
const [copied, setCopied] = useState(false);
const tableRef = useRef<HTMLTableElement | null>(null);
const timerRef = useRef<number | null>(null);
useEffect(() => {
return () => {
if (timerRef.current !== null) {
window.clearTimeout(timerRef.current);
}
};
}, []);
const flashCopied = () => {
setCopied(true);
if (timerRef.current !== null) {
window.clearTimeout(timerRef.current);
}
timerRef.current = window.setTimeout(() => setCopied(false), 1500);
};
const handleCopy = async () => {
if (!tableRef.current) return;
const { headers, rows } = readTable(tableRef.current);
await writeToClipboard(toTsv(headers, rows));
flashCopied();
};
const handleDownloadCsv = () => {
if (!tableRef.current) return;
const { headers, rows } = readTable(tableRef.current);
const filename = `table-${new Date().toISOString().replace(/[:.]/g, "-")}.csv`;
// Prefix with UTF-8 BOM so Excel opens it correctly.
downloadFile(filename, `\uFEFF${toCsv(headers, rows)}`, "text/csv;charset=utf-8");
};
return (
<div className="tableBlock">
<div className="tableBlockActions">
<button
aria-label="Download table as CSV"
className="tableDownload"
onClick={handleDownloadCsv}
title="Download as CSV"
type="button"
>
{downloadIcon}
</button>
<button
aria-label={copied ? "Copied" : "Copy table as TSV"}
className={`tableCopy${copied ? " copied" : ""}`}
onClick={handleCopy}
title={copied ? "Copied" : "Copy as TSV"}
type="button"
>
{copied ? checkIcon : copyIcon}
</button>
</div>
<div className="tableScroll">
<table ref={tableRef}>{children}</table>
</div>
</div>
);
};
export const normalizeMarkdown = (value: string) =>
value.replace(/\\n/g, "\n").replace(/\\t/g, "\t");
export const markdownComponents = {
a: ({ href, children }: { href?: string; children?: ReactNode }) => (
<a href={href} rel="noreferrer" target="_blank">
{children}
</a>
),
pre: ({ children }: { children?: ReactNode }) => <CodeBlock>{children}</CodeBlock>,
table: ({ children }: { children?: ReactNode }) => <TableBlock>{children}</TableBlock>,
};
export const MarkdownView = ({ source }: { source: string }) => (
<Markdown components={markdownComponents} remarkPlugins={[remarkGfm]} skipHtml>
{normalizeMarkdown(source)}
</Markdown>
);
export { CodeBlock };

1710
apps/web/src/main.tsx Normal file

File diff suppressed because it is too large Load Diff

1184
apps/web/src/styles.css Normal file

File diff suppressed because it is too large Load Diff

1
apps/web/src/vite-env.d.ts vendored Normal file
View File

@@ -0,0 +1 @@
/// <reference types="vite/client" />

8
apps/web/tsconfig.json Normal file
View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"jsx": "react-jsx",
"outDir": "dist"
},
"include": ["src", "vite.config.ts"]
}

14
apps/web/vite.config.ts Normal file
View File

@@ -0,0 +1,14 @@
import { defineConfig } from "vite";
import react from "@vitejs/plugin-react";
export default defineConfig({
plugins: [react()],
server: {
port: Number(process.env.WEB_PORT ?? 3000),
proxy: {
"/api": "http://localhost:8787",
"/healthz": "http://localhost:8787",
"/readyz": "http://localhost:8787",
},
},
});

5
config/docs.yml Normal file
View File

@@ -0,0 +1,5 @@
sources:
- id: knowledge
path: ./knowledge
include:
- "**/*.md"

75
config/mcp.yml Normal file
View File

@@ -0,0 +1,75 @@
# MCP (Model Context Protocol) tool catalog.
#
# Phase 1: this is a DECLARATIVE catalog of available tools. The SIC backend
# does NOT call any MCP server directly. The LLM sees these tools in its
# context (so it can reason about what's possible) and the right panel
# renders them as recommendations that the user can inspect. Execution is
# reserved for the future MCP runtime, which will run a per-request MCP
# client; for now every tool here is "inspect only".
#
# Each tool:
# id stable identifier
# name human-readable label
# description what the tool does (sent to the LLM verbatim)
# server optional reference to a configured MCP server (Phase 2+)
# parameters JSON Schema for the tool arguments
# tags free-form tags for filtering
# enabled whether the tool is exposed at all
mcp_servers: []
mcp_tools:
- id: kb.search
name: Search knowledge base
description: |
Search the internal knowledge base (runbooks, SOPs, architecture docs) for
documents relevant to the query. Returns matching document ids and
relevance scores. Does NOT execute anything.
server: null
parameters:
type: object
required: [query]
properties:
query:
type: string
description: Natural-language search query.
limit:
type: integer
minimum: 1
maximum: 20
description: Maximum results to return. Defaults to 5.
tags: [search, knowledge, docs]
enabled: true
- id: sessions.list
name: List recent chat sessions
description: |
List the calling user's most recent chat sessions, ordered by updated_at
descending. Does NOT execute anything; returns metadata only.
server: null
parameters:
type: object
required: []
properties:
limit:
type: integer
minimum: 1
maximum: 50
description: Maximum sessions to return. Defaults to 10.
tags: [sessions, listing]
enabled: true
- id: webhooks.usage
name: Webhook usage stats
description: |
Return per-webhook usage stats for the calling user over a recent time
window (defaults to 7 days). Read-only.
server: null
parameters:
type: object
required: []
properties:
days:
type: integer
minimum: 1
maximum: 90
description: Window in days. Defaults to 7.
tags: [webhooks, audit, read-only]
enabled: true

29
config/models.yml Normal file
View File

@@ -0,0 +1,29 @@
models:
- id: fast
label: MiniMax Fast
provider: openai-compatible
base_url: ${LLM_BASE_URL}
model: MiniMax-M2.7-highspeed
max_tokens: 1024
- id: balanced
label: MiniMax Balanced
provider: openai-compatible
base_url: ${LLM_BASE_URL}
model: MiniMax-M2.7
max_tokens: 2048
# If MiniMax fails (5xx, 429, timeout, network) fall through to mr-auto.
fallback:
- mr-auto
- id: reasoning
label: MiniMax Reasoning
provider: openai-compatible
base_url: ${LLM_BASE_URL}
model: MiniMax-M3
max_tokens: 4096
- id: mr-auto
label: MR Auto (llm.rikrdo.com)
provider: openai-compatible
base_url: https://llm.rikrdo.com/v1
api_key_env: MR_AUTO_API_KEY
model: mr-auto
max_tokens: 8192

35
config/n8n-workflows.yml Normal file
View File

@@ -0,0 +1,35 @@
# n8n-style external workflow links.
#
# These are NOT executed by the SIC backend. Each entry is a deep link into
# an n8n (or similar) workflow runner that the user can open in a new tab.
# The LLM can recommend them as "external" suggestions; the right panel
# renders them with a distinct icon and the action only opens a new tab.
#
# Fields:
# id stable identifier (kebab-case recommended)
# label human-readable label
# description what the workflow does
# url absolute external URL (env vars supported)
# required_roles user roles allowed to see this link (optional, [] = any)
# tags free-form tags to help retrieval / filtering
n8n_workflows:
- id: n8n-vpn-restart
label: Restart VPN tunnel (n8n)
description: Opens the n8n workflow that restarts the VPN tunnel after credential rotation.
url: ${N8N_BASE_URL:?set N8N_BASE_URL}/workflow/vpn-restart
required_roles:
- webhook-runner
- sre
tags: [vpn, network, automation]
- id: n8n-incident-summary
label: Generate incident summary (n8n)
description: Opens the n8n workflow that drafts an incident summary from the current session.
url: ${N8N_BASE_URL:?set N8N_BASE_URL}/workflow/incident-summary
required_roles: []
tags: [incident, automation, reporting]
- id: n8n-postmortem
label: Open postmortem workflow (n8n)
description: Opens the postmortem workflow in n8n with the current session context.
url: ${N8N_BASE_URL:?set N8N_BASE_URL}/workflow/postmortem
required_roles: []
tags: [postmortem, automation]

42
config/rag.yml Normal file
View File

@@ -0,0 +1,42 @@
# Retrieval-Augmented Generation (RAG) configuration.
#
# SIC treats the knowledge base as an external service. The RAG service is
# expected to expose:
# POST {endpoint}/search
# body: { query, limit, min_relevance, include_tags, exclude_tags }
# returns: { items: [{ id, title, source, tags, relevance, excerpt, content? }] }
# GET {endpoint}/docs/:id
# returns: { id, title, source, tags, owner?, updated?, headings, content }
#
# For local dev (or when no endpoint is configured) the docs repository
# falls back to reading Markdown files from `knowledge/` and applying the
# token-overlap scoring in apps/api/src/docs/repository.ts.
#
# Fields:
# endpoint external RAG service base URL (no trailing slash).
# Leave empty to use the local fallback.
# auth_token optional bearer token sent in the Authorization header.
# timeout_ms HTTP request timeout. Default: 10000.
# fallback_to_local when true (default), use the local knowledge/ directory
# if the external endpoint fails. Set to false to fail
# closed.
# chunk_strategy how to split a Markdown doc into chunks (local mode only)
# - "heading" : split on H1/H2/H3, each chunk is a section
# - "paragraph": split on blank lines, each chunk is a paragraph block
# - "fixed" : split on a fixed character length (chunk_size_chars)
# chunk_size_chars only used by "fixed" strategy (local mode only)
# top_k max chunks returned per query
# min_relevance chunks with relevance below this are dropped
# include_tags optional global include filter
# exclude_tags optional global exclude filter
rag:
endpoint: ${RAG_ENDPOINT_URL:}
auth_token: ${RAG_AUTH_TOKEN:}
timeout_ms: 10000
fallback_to_local: true
chunk_strategy: heading
chunk_size_chars: 1500
top_k: 5
min_relevance: 0.0
include_tags: []
exclude_tags: []

43
config/skills.yml Normal file
View File

@@ -0,0 +1,43 @@
# Skills are prompt fragments injected into the LLM's system prompt.
# They are persona/behavior customizations, NOT capabilities: the model still
# only recommends actions and the backend executes them.
#
# Fields:
# id stable identifier (kebab-case recommended)
# name human-readable label
# description what the skill does (safe to expose via /api/skills)
# enabled whether the fragment is injected (true/false)
# prompt the system prompt fragment to inject
#
# All skills are loaded at API boot. Restart the API after editing this file.
skills:
- id: sre-on-call
name: SRE on-call mode
description: Respond as a senior SRE handling a production page.
enabled: true
prompt: |
You are responding as a senior SRE who is on-call. Be concise and operational.
Always reference the runbook ids from internal_docs when relevant. Prefer
concrete actions over abstract advice. When the user's intent is ambiguous,
ask one short clarifying question instead of guessing.
- id: blameless-postmortem
name: Blameless postmortem writer
description: Help write blameless postmortems using the standard template.
enabled: true
prompt: |
When the user asks for a postmortem or incident review, follow the
postmortem runbook template exactly. Use blameless language: focus on
systemic causes and contributing factors, never on individual blame.
The output must include: Summary, Timeline, Root cause, What went well,
What went wrong, Corrective actions, Lessons learned.
- id: security-incident
name: Security incident response
description: Guide containment and coordination for security incidents.
enabled: false
prompt: |
When the user describes a security incident, prioritize containment and
evidence preservation before root-cause analysis. Recommend involving the
Security IR team and the Communications Lead. Never suggest actions that
would destroy forensic evidence.

81
config/webhooks.yml Normal file
View File

@@ -0,0 +1,81 @@
webhooks:
- id: vpn-diagnostic
label: Run VPN diagnostic
description: Runs a diagnostic on the VPN server and returns latency and health metrics.
method: POST
url: ${WEBHOOK_VPN_DIAGNOSTIC_URL}
required_roles:
- webhook-runner
confirmation_required: true
payload_template:
source: pi-chat
user_id: "{{user.id}}"
session_id: "{{session.id}}"
last_user_message: "{{chat.last_user_message}}"
- id: service-restart
label: Restart service
description: Restarts a system service. The service name is read from the payload.
method: POST
url: ${WEBHOOK_SERVICE_RESTART_URL}
required_roles:
- webhook-runner
- sre
confirmation_required: true
payload_template:
source: pi-chat
user_id: "{{user.id}}"
session_id: "{{session.id}}"
service: "{{payload.service}}"
last_user_message: "{{chat.last_user_message}}"
- id: dns-flush
label: Flush local DNS
description: Clears the local DNS cache and returns the result.
method: POST
url: ${WEBHOOK_DNS_FLUSH_URL}
required_roles:
- webhook-runner
confirmation_required: true
payload_template:
source: pi-chat
user_id: "{{user.id}}"
session_id: "{{session.id}}"
last_user_message: "{{chat.last_user_message}}"
- id: disk-cleanup
label: Clean /tmp
description: Removes files in /tmp older than 7 days.
method: POST
url: ${WEBHOOK_DISK_CLEANUP_URL}
required_roles:
- webhook-runner
- sre
confirmation_required: true
payload_template:
source: pi-chat
user_id: "{{user.id}}"
session_id: "{{session.id}}"
last_user_message: "{{chat.last_user_message}}"
- id: log-tail
label: Tail service log
description: Returns the last N lines of a service log.
method: GET
url: ${WEBHOOK_LOG_TAIL_URL}
required_roles:
- webhook-runner
confirmation_required: false
payload_template:
service: "{{payload.service}}"
lines: "{{payload.lines}}"
- id: cache-purge
label: Purge CDN cache
description: Invalidates the CDN cache for the provided paths.
method: POST
url: ${WEBHOOK_CACHE_PURGE_URL}
required_roles:
- webhook-runner
confirmation_required: true
payload_template:
source: pi-chat
user_id: "{{user.id}}"
session_id: "{{session.id}}"
paths: "{{payload.paths}}"
last_user_message: "{{chat.last_user_message}}"

0
data/.gitkeep Normal file
View File

27
deploy/docker-compose.yml Normal file
View File

@@ -0,0 +1,27 @@
services:
api:
build:
context: ..
dockerfile: deploy/docker/api.Dockerfile
ports:
- "8787:8787"
volumes:
- ../data:/app/data
- ../config:/app/config
- ../knowledge:/app/knowledge
environment:
AUTH_MODE: local
DATABASE_URL: sqlite:///app/data/pi-chat.db
API_PORT: 8787
LLM_BASE_URL: https://api.minimax.io/v1
LLM_API_KEY: ${MINIMAX_API_KEY:?set MINIMAX_API_KEY}
DEFAULT_MODEL: fast
web:
build:
context: ..
dockerfile: deploy/docker/web.Dockerfile
ports:
- "3000:3000"
depends_on:
- api

View File

@@ -0,0 +1,9 @@
FROM node:22-alpine
WORKDIR /app
COPY package.json pnpm-workspace.yaml tsconfig.base.json ./
COPY apps/api ./apps/api
COPY packages ./packages
RUN corepack enable && pnpm install --frozen-lockfile=false
WORKDIR /app/apps/api
EXPOSE 8787
CMD ["pnpm", "dev"]

View File

@@ -0,0 +1,9 @@
FROM node:22-alpine
WORKDIR /app
COPY package.json pnpm-workspace.yaml tsconfig.base.json ./
COPY apps/web ./apps/web
COPY packages ./packages
RUN corepack enable && pnpm install --frozen-lockfile=false
WORKDIR /app/apps/web
EXPOSE 3000
CMD ["pnpm", "dev"]

17
docs/agents/api-agent.md Normal file
View File

@@ -0,0 +1,17 @@
# API Agent
Owns the Fastify backend.
## Focus
- Design HTTP/SSE contracts first.
- Persist every critical state in SQLite.
- Validate ownership with `session_id + user_id`.
- Emit JSON logs.
- Keep `/healthz` and `/readyz` simple.
## Do not
- Do not keep sessions in memory.
- Do not expose real webhook URLs to clients.
- Do not execute webhooks without explicit confirmation.

View File

@@ -0,0 +1,14 @@
# PI Adapter Agent
Owns isolating the `pi.dev` / LLM provider runtime.
## Focus
- Expose a stable contract to the backend.
- Support OpenAI-compatible providers.
- Return a structured response: `answer`, `recommended_actions`, `internal_docs`.
## Do not
- Do not mix backend HTTP rules with model logic.
- Do not let the model execute tools directly in Phase 1.

View File

@@ -0,0 +1,11 @@
# Security & Reliability Agent
Owns reviewing isolation, audit, and execution rules.
## Checklist
- Every message query filters by `session_id` AND `user_id`.
- Every webhook validates roles before being shown and before being executed.
- Every execution is recorded in `webhook_runs`.
- The frontend never receives real webhook URLs.
- No critical state lives only in memory.

15
docs/agents/web-agent.md Normal file
View File

@@ -0,0 +1,15 @@
# Web Agent
Owns the React + Vite UI.
## Focus
- Three-column layout: sessions, chat, right panel.
- Consume SSE from `/api/chat/stream`.
- Show recommended actions without auto-executing them.
- Rebuild state from the API, not from local memory as the source of truth.
## Do not
- Do not call webhooks directly from the browser.
- Do not store tokens or secrets in the frontend.

View File

@@ -0,0 +1,2 @@
=== PROMPT ===
=== RESPUESTA ===

View File

@@ -0,0 +1,11 @@
# Short definition
`SIC — Super Incident Commander` is a multi-session web interface for consulting a centralized `pi.dev` engine, with persistent history, simple search over internal documentation, and webhook recommendations that are only executed from the backend after explicit user confirmation.
## Target user
Small team, up to 5 concurrent users.
## Successful MVP
A user opens the UI, creates or resumes a session, asks a question, receives a streamed response, sees related documentation, gets recommended actions, and can execute a confirmed webhook. Everything is persisted and auditable.

57
docs/reliable-history.md Normal file
View File

@@ -0,0 +1,57 @@
# Reliable History
## Goal
Guarantee that the chat history is reconstructible, isolated by user, and consistent even if the backend restarts.
## Mandatory rules
1. Persist the user message before calling the LLM.
2. Persist the assistant response when the stream finishes.
3. If the LLM fails, record the failure in metadata or as a controlled error message.
4. Do not keep critical conversational state in memory.
5. All session and message queries must filter by `session_id` AND `user_id`.
6. Webhooks must be audited even when they fail.
## Base tables
```sql
CREATE TABLE IF NOT EXISTS chat_sessions (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
title TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS chat_messages (
id TEXT PRIMARY KEY,
session_id TEXT NOT NULL,
user_id TEXT NOT NULL,
role TEXT NOT NULL,
content TEXT NOT NULL,
metadata TEXT,
created_at TEXT NOT NULL,
FOREIGN KEY (session_id) REFERENCES chat_sessions(id)
);
CREATE TABLE IF NOT EXISTS webhook_runs (
id TEXT PRIMARY KEY,
webhook_id TEXT NOT NULL,
user_id TEXT NOT NULL,
session_id TEXT NOT NULL,
status TEXT NOT NULL,
request_payload TEXT,
response_status INTEGER,
created_at TEXT NOT NULL
);
```
## Security invariant
```sql
WHERE session_id = ?
AND user_id = ?
```
Without this filter, the query is incorrectly designed.

View File

@@ -0,0 +1,25 @@
---
title: Disk Cleanup Runbook
tags: [disk, cleanup, storage, operations]
owner: sre
updated: 2026-06-12
---
# Disk Cleanup Runbook
## When to use it
- `disk usage > 85%` alert on /tmp or /var.
- Job failures with `No space left on device`.
- Before scheduled node maintenance.
## Procedure
1. List candidate files: `find /tmp -type f -mtime +7`.
2. Confirm none are in use by an active process.
3. Run the `disk-cleanup` webhook to remove /tmp files older than 7 days.
4. Re-verify disk usage.
## Related webhooks
- disk-cleanup

View File

@@ -0,0 +1,24 @@
---
title: DNS Flush Runbook
tags: [dns, network, cache, troubleshooting]
owner: netops
updated: 2026-06-10
---
# DNS Flush Runbook
## Symptoms
- DNS resolutions return stale IPs.
- Users report that a site "works on some machines and not on others".
- Recent DNS changes are not propagating.
## Diagnosis
1. Check the local cache with `ipconfig /displaydns` or `resolvectl statistics`.
2. Confirm the upstream resolver is responding.
3. Run the `dns-flush` webhook on the affected machine.
## Related webhooks
- dns-flush

View File

@@ -0,0 +1,280 @@
---
title: Production Incident Response Runbook (long-form)
tags: [incident, production, sre, on-call, runbook, master]
owner: sre
updated: 2026-06-28
---
# Production Incident Response Runbook (long-form)
> This runbook is designed to exercise the UI: it contains nested headings, lists, tables, code blocks, blockquotes, links, and enough volume to force scroll in the modal. Use it as a reference during drills and to validate the look of the documentation viewer.
## Table of contents
1. [Purpose and scope](#purpose-and-scope)
2. [Severities and SLAs](#severities-and-slas)
3. [Roles and responsibilities](#roles-and-responsibilities)
4. [Response flow](#response-flow)
5. [Initial diagnosis](#initial-diagnosis)
6. [Common incident patterns](#common-incident-patterns)
7. [Useful commands](#useful-commands)
8. [Available webhooks](#available-webhooks)
9. [Escalation](#escalation)
10. [Post-mortem](#post-mortem)
11. [Appendix: glossary](#appendix-glossary)
## Purpose and scope
This runbook defines the standard procedure for responding to production incidents that affect the availability, integrity, or performance of critical services. It applies to every engineering and operations team that maintains services in scope of SIC.
### When to use this runbook
- Partial or total service outages.
- Severe performance degradation (p99 latency > agreed SLA).
- Confirmed or suspected data loss or corruption.
- Security alerts with production impact.
### When NOT to use this runbook
- Failures in dev or staging environments without user impact.
- Change requests or scheduled maintenance.
- HR or administrative process incidents.
## Severities and SLAs
| Severity | Definition | Ack SLA | Mitigation SLA | Communication |
| --- | --- | --- | --- | --- |
| **SEV-1** | Total outage or data loss | 5 minutes | 60 minutes | Every 15 min |
| **SEV-2** | Severe degradation, affects > 30% of users | 10 minutes | 2 hours | Every 30 min |
| **SEV-3** | Partial degradation, affects < 30% of users | 30 minutes | 8 hours | Every 2 hours |
| **SEV-4** | Cosmetic, no functional impact | 1 business day | Next sprint | Async |
> **Important**: severity can go up or down as the incident evolves. Document every change in the incident channel with a timestamp.
## Roles and responsibilities
- **Incident Commander (IC)**: coordinates the response, does not run technical tasks. The only person who can declare the incident resolved.
- **Comms Lead**: handles communication to stakeholders, status page, and customers.
- **Tech Lead**: leads the technical investigation, assigns tasks to the response team.
- **Subject Matter Expert (SME)**: provides system-specific knowledge for the affected service.
- **Scribe**: documents the incident timeline in real time.
## Response flow
1. **Detect**: alert, user report, or proactive monitoring.
2. **Triage**: classify severity and assign an IC in under 5 minutes.
3. **Convene**: open a bridge and the #inc-YYYYMMDD-XX channel.
4. **Mitigate**: apply changes to restore service. The root cause can wait.
5. **Resolve**: confirm the service is stable. Close the incident.
6. **Post-mortem**: within 5 business days, blameless.
### Flow diagram
```mermaid
graph TD
A[Detect] --> B{Triage}
B -->|SEV-1/2| C[Open bridge]
B -->|SEV-3/4| D[Assign owner]
C --> E[Investigate]
D --> E
E --> F{Mitigation?}
F -->|Yes| G[Apply fix]
F -->|No| H[Escalate]
G --> I[Monitor]
I --> J{Stable?}
J -->|Yes| K[Close]
J -->|No| E
H --> E
K --> L[Post-mortem]
```
## Initial diagnosis
Before going deeper, run the following steps in order:
1. Check the overall service health dashboard.
2. Review the last hour of production changes (`deploy log`).
3. Check active alerts in the monitoring system.
4. Confirm the failure is not user-side (DNS, local network).
### Triage checklist
- [ ] Affected service identified
- [ ] Severity assigned
- [ ] IC identified
- [ ] Bridge open
- [ ] Communication channel created
- [ ] Status page updated
- [ ] Comms lead assigned
## Common incident patterns
### Pattern A: latency spike
**Symptoms**: p99 latency rises from 200 ms to > 2 s without proportional traffic increase.
**Typical causes**:
- DB connection pool saturation.
- Massive cache miss (accidental invalidation).
- Long JVM garbage collection.
**Immediate actions**:
1. Check DB metrics (connections, locks, slow queries).
2. Validate cache hit rate.
3. If no cause is identified in 5 min, escalate to the service SME.
### Pattern B: cascading 5xx errors
**Symptoms**: sudden increase of HTTP 500/502/503 on one or more endpoints.
**Typical causes**:
- Upstream service down.
- Invalid configuration deployed.
- External resource (third-party API) unavailable.
**Immediate actions**:
1. Identify the failing upstream service.
2. Review the last deploy touching that path.
3. If the deploy is to blame, consider a rollback.
### Pattern C: data loss
**Symptoms**: customers report missing or inconsistent data.
**Typical causes**:
- Cleanup job that deleted more than intended.
- Schema migration executed with a bug.
- Bug in business logic.
**Immediate actions**:
1. **Stop** any job that could make things worse.
2. Evaluate whether a recent and viable backup can be restored.
3. Escalate immediately to the engineering lead.
## Useful commands
### Check connectivity
```bash
# DNS
dig +short example.com
# Basic HTTP
curl -sSI https://api.example.com/health
# TCP to a specific port
nc -zv db.internal 5432
```
### Inspect logs live
```bash
# Last 100 lines and follow
kubectl logs -n prod deploy/api --tail=100 -f
# Logs from the last 5 minutes
kubectl logs -n prod deploy/api --since=5m
# Logs of a specific pod
kubectl logs -n prod api-7d4f8b9c-x2k9n --tail=200
```
### Quick metrics
```bash
# CPU per pod
kubectl top pods -n prod
# Memory per pod
kubectl top pods -n prod --containers
# Disk usage of a node
ssh node-01 df -h
```
## Available webhooks
| Webhook | When to use it | Requires confirmation |
| --- | --- | --- |
| `vpn-diagnostic` | VPN access issues | Yes |
| `service-restart` | Hung or zombie service | Yes |
| `dns-flush` | Broken DNS resolution | No |
| `disk-cleanup` | Disk > 90% | Yes |
| `log-tail` | Need logs in real time | No |
| `cache-purge` | Stale or corrupt cache | Yes |
> Remember: webhook execution always requires explicit confirmation from the user who triggers it. The LLM can only recommend them; it must never execute them directly.
## Escalation
If the incident is not mitigated within the agreed SLA:
1. Notify the area's on-call manager.
2. If it exceeds 2 hours, notify the engineering director.
3. If customers are impacted, involve Customer Success.
4. If there is monetary or data loss, notify Legal and the C-level.
### Emergency contacts
```text
SRE on-call: +54 11 5555-0001
Platform lead: +54 11 5555-0002
Security IR: +54 11 5555-0003
CTO: +54 11 5555-0004
```
## Post-mortem
Within 5 business days after closing the incident:
1. Schedule a meeting with everyone involved.
2. Share the post-mortem document 24 h in advance.
3. During the meeting: review the timeline, identify the root cause.
4. Document an action plan with owners and dates.
5. Share learnings with the rest of the organization.
### Post-mortem template
```markdown
# Post-mortem: <title>
## Summary
<2-3 sentences about what happened and what the impact was>
## Timeline
- HH:MM - <event>
- HH:MM - <event>
## Root cause
<technical description of the cause>
## What went well
- <item>
- <item>
## What went wrong
- <item>
- <item>
## Corrective actions
- [ ] <action> - owner: <person> - due: <date>
- [ ] <action> - owner: <person> - due: <date>
## Lessons learned
<actionable insights for the team and the organization>
```
## Appendix: glossary
- **IC**: Incident Commander.
- **SME**: Subject Matter Expert.
- **SLA**: Service Level Agreement.
- **p99**: 99th percentile of latency.
- **Blameless**: culture where the post-mortem looks for systemic causes, not blame.
- **Rollback**: reverting a change to the previous version.
- **Mitigation**: action to reduce impact, not necessarily the root cause.
- **Resolution**: confirmation that the system is stable.
---
> If you find outdated or missing information in this runbook, edit the file and notify the SRE team. The source of truth is always the repository, not PDFs attached in Confluence.

View File

@@ -0,0 +1,36 @@
---
title: Incident Response Framework
tags: [incident, response, framework, sev, runbook]
owner: sre
updated: 2026-06-20
---
# Incident Response Framework
## Severities
- **SEV1**: total outage. Page on-call. Mitigate first, post-mortem after.
- **SEV2**: significant degradation. Ticket + stakeholder communication.
- **SEV3**: minor impact. Normal ticket.
## Steps
1. **Detect**: automatic alert or report.
2. **Triage**: identify scope and severity.
3. **Mitigate**: apply runbook or workaround before the root-cause fix.
4. **Communicate**: status page and stakeholders every 30 min for SEV1.
5. **Resolve**: apply the root-cause fix.
6. **Post-mortem**: blameless, within 5 business days.
## Roles
- Incident Commander
- Communications Lead
- Subject Matter Expert
## Related webhooks
- service-restart
- dns-flush
- disk-cleanup
- log-tail

View File

@@ -0,0 +1,32 @@
---
title: Service Restart Runbook
tags: [service, restart, systemd, operations]
owner: sre
updated: 2026-06-15
---
# Service Restart Runbook
## When to use it
- The service is down or not responding to health checks.
- Sustained performance drop that cannot be explained by load.
- After a deploy that left the service in an inconsistent state.
## Diagnosis
1. Confirm the current state: `systemctl status <service>` or equivalent.
2. Review the last 200 lines of the log.
3. Check dependencies (DB, Redis, network).
4. If there is no clear cause, escalate via the `service-restart` webhook.
## Equivalent command
```bash
systemctl restart <service>
```
## Related webhooks
- service-restart
- log-tail

22
knowledge/runbooks/vpn.md Normal file
View File

@@ -0,0 +1,22 @@
---
title: VPN Runbook
tags: [vpn, network, access]
owner: sre
updated: 2026-06-01
---
# VPN Runbook
## Symptoms
Users cannot connect to the VPN or lose access intermittently.
## Diagnosis
- Check the VPN service status.
- Review gateway logs.
- Confirm user-side connectivity.
## Related webhooks
- vpn-diagnostic

View File

@@ -0,0 +1,23 @@
---
title: Log Reading SOP
tags: [logs, sops, troubleshooting, observability]
owner: sre
updated: 2026-06-05
---
# Log Reading SOP
## Goal
Retrieve the last N lines of a service log in under 30 seconds.
## Procedure
1. Identify the service and the log path.
2. Call the `log-tail` webhook with `service` and `lines`.
3. Look for error patterns (ERROR, CRITICAL, stack traces).
4. If there is a matching runbook, follow it.
## Related webhooks
- log-tail

21
package.json Normal file
View File

@@ -0,0 +1,21 @@
{
"name": "pi-chat-harness",
"private": true,
"version": "0.1.0",
"type": "module",
"packageManager": "pnpm@9.15.0",
"scripts": {
"dev": "pnpm -r --parallel dev",
"lint": "pnpm -r lint",
"typecheck": "pnpm -r typecheck",
"test": "vitest run",
"test:watch": "vitest",
"smoke": "node scripts/smoke.mjs",
"smoke:mock": "node scripts/smoke.mjs --mock-llm",
"mock:llm": "node scripts/mock-llm.mjs"
},
"devDependencies": {
"typescript": "^5.8.3",
"vitest": "^4.1.9"
}
}

View File

@@ -0,0 +1,19 @@
{
"name": "@pi-chat/pi-adapter",
"private": true,
"version": "0.1.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"scripts": {
"typecheck": "tsc --noEmit",
"lint": "tsc --noEmit"
},
"dependencies": {
"@pi-chat/shared": "workspace:*"
},
"devDependencies": {
"typescript": "^5.8.3"
}
}

View File

@@ -0,0 +1,354 @@
import type { ChatResult, InternalDocReference, RecommendedAction } from "@pi-chat/shared";
export type PiChatInput = {
message: string;
model: string;
docs: InternalDocReference[];
availableActions: RecommendedAction[];
history?: Array<{
role: "user" | "assistant" | "system" | "tool";
content: string;
}>;
skillPrompts?: string[];
// Optional per-session system prompt override. Inserted right after the
// base identity prompt and before skills, so it can extend the persona
// without contradicting the strict rules in the base prompt.
systemPrompt?: string | null;
};
export interface PiAdapter {
chat(input: PiChatInput): Promise<PiChatResult>;
}
export type PiChatError =
| { kind: "json_parse"; reason: string; rawContent: string }
| { kind: "schema"; reason: string; rawContent: string }
| { kind: "no_content"; message: string };
export type PiChatUsage = {
promptTokens?: number;
completionTokens?: number;
totalTokens?: number;
cachedTokens?: number;
durationMs?: number;
};
export type PiChatResult =
| { ok: true; result: ChatResult; usage?: PiChatUsage }
| { ok: false; error: PiChatError; fallback: ChatResult; usage?: PiChatUsage };
export type OpenAICompatiblePiAdapterOptions = {
baseUrl: string;
apiKey: string;
defaultModel: string;
timeoutMs?: number;
maxTokens?: number;
};
type OpenAIChatResponse = {
choices?: Array<{
message?: {
content?: string;
};
}>;
usage?: {
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
prompt_tokens_details?: { cached_tokens?: number };
};
};
const score = (value: unknown, fallback: number) => {
if (typeof value !== "number" || Number.isNaN(value)) return fallback;
return Math.max(0, Math.min(1, value));
};
// Extract an answer string from a parsed JSON object regardless of which key
// the model picked. Tries common names first, then falls back to the longest
// string-valued key. Returns null if parsed isn't an object or has no useful
// string field.
const extractAnswer = (parsed: unknown): string | null => {
if (!parsed || typeof parsed !== "object") return null;
const obj = parsed as Record<string, unknown>;
const preferredKeys = ["answer", "response", "output", "text", "content", "message", "result"];
for (const key of preferredKeys) {
const value = obj[key];
if (typeof value === "string" && value.trim().length > 0) return value;
}
let longest: { value: string; len: number } | null = null;
for (const value of Object.values(obj)) {
if (typeof value === "string") {
const trimmed = value.trim();
if (trimmed.length === 0) continue;
// Skip keys we already treat as arrays elsewhere (handled separately).
if (value === obj.recommended_actions || value === obj.internal_docs) continue;
if (!longest || trimmed.length > longest.len) longest = { value: trimmed, len: trimmed.length };
}
}
return longest?.value ?? null;
};
const normalizeResult = (
parsed: unknown,
fallbackAnswer: string,
docs: InternalDocReference[],
availableActions: RecommendedAction[],
): ChatResult => {
const docsById = new Map(docs.map((doc) => [doc.id, doc]));
const actionsById = new Map(availableActions.map((action) => [action.id, action]));
const obj = (parsed && typeof parsed === "object" ? parsed : {}) as Record<string, unknown>;
return {
answer: extractAnswer(parsed) ?? fallbackAnswer,
recommended_actions: Array.isArray(obj.recommended_actions)
? (obj.recommended_actions as unknown[]).flatMap((action) => {
// Loose mode: accept either an object with id, or a bare string id.
if (typeof action === "string") {
if (!actionsById.has(action)) return [];
const configured = actionsById.get(action)!;
return [{
type: configured.type ?? ("webhook" as const),
id: action,
confidence: 0,
reason: configured.reason ?? action,
requires_confirmation: configured.requires_confirmation ?? true,
}];
}
if (!action || typeof action !== "object") return [];
const candidate = action as Partial<RecommendedAction>;
if (!candidate.id || !actionsById.has(candidate.id)) return [];
const configured = actionsById.get(candidate.id)!;
return [{
type: "webhook" as const,
id: candidate.id,
confidence: score(candidate.confidence, 0),
reason: String(candidate.reason ?? configured.reason ?? "Suggested action"),
requires_confirmation: candidate.requires_confirmation ?? configured.requires_confirmation ?? true,
}];
})
: [],
internal_docs: Array.isArray(obj.internal_docs)
? (obj.internal_docs as unknown[]).flatMap((doc) => {
// Loose mode: accept either an object with id, or a bare string id.
if (typeof doc === "string") {
if (!docsById.has(doc)) return [];
const indexed = docsById.get(doc)!;
return [{ id: doc, title: indexed.title, source: indexed.source, relevance: 0 }];
}
if (!doc || typeof doc !== "object") return [];
const candidate = doc as Partial<InternalDocReference>;
if (!candidate.id || !docsById.has(candidate.id)) return [];
const indexed = docsById.get(candidate.id)!;
return [{
id: candidate.id,
title: String(candidate.title ?? indexed.title),
source: String(candidate.source ?? indexed.source),
relevance: score(candidate.relevance, indexed.relevance),
}];
})
: docs,
};
};
const safeJsonParse = (
value: string,
docs: InternalDocReference[],
availableActions: RecommendedAction[],
): { ok: true; result: ChatResult } | { ok: false; error: PiChatError } => {
if (!value || value.trim().length === 0) {
return {
ok: false,
error: { kind: "no_content", message: "LLM returned an empty message." },
};
}
const start = value.indexOf("{");
const end = value.lastIndexOf("}");
if (start === -1 || end === -1 || end <= start) {
return {
ok: false,
error: {
kind: "json_parse",
reason: "no JSON object delimiters found in response",
rawContent: value.slice(0, 500),
},
};
}
let parsed: unknown = null;
const slice = value.slice(start, end + 1);
try {
parsed = JSON.parse(slice);
} catch {
// Truncated JSON is a real failure mode for local models (vLLM /
// ollama often cut output mid-array when max_tokens conflicts with
// their actual context budget). Try to salvage the answer field via
// regex before giving up.
const salvaged = salvageAnswer(slice);
if (salvaged !== null) {
return {
ok: true,
result: {
answer: salvaged,
recommended_actions: [],
internal_docs: docs,
},
};
}
return {
ok: false,
error: {
kind: "json_parse",
reason: "JSON.parse failed on truncated or malformed output",
rawContent: value.slice(0, 500),
},
};
}
if (parsed === null || typeof parsed !== "object") {
return {
ok: false,
error: {
kind: "schema",
reason: "parsed value is not an object",
rawContent: value.slice(0, 500),
},
};
}
return { ok: true, result: normalizeResult(parsed, value, docs, availableActions) };
};
// Best-effort regex extraction of the answer field from truncated JSON.
// Matches the first key named like "answer" (with optional quote-prefix
// chars from quirky local models) and captures its string value.
const salvageAnswer = (slice: string): string | null => {
// Try the canonical key first, then common variants seen from local models.
const candidates = ['answer', 'response', 'output', 'text', 'content', 'message', 'result'];
for (const key of candidates) {
const re = new RegExp(`["']?${key}["']?\\s*:\\s*"((?:[^"\\\\]|\\\\.)*)"`);
const m = slice.match(re);
if (m && m[1] && m[1].trim().length > 0) {
return m[1].replace(/\\"/g, '"').replace(/\\n/g, "\n").replace(/\\t/g, "\t").replace(/\\\\/g, "\\");
}
}
return null;
};
export const createOpenAICompatiblePiAdapter = (options: OpenAICompatiblePiAdapterOptions): PiAdapter => ({
async chat(input) {
const abortController = new AbortController();
const timeout = setTimeout(() => abortController.abort(), options.timeoutMs ?? 30_000);
const response = await fetch(`${options.baseUrl.replace(/\/$/, "")}/chat/completions`, {
method: "POST",
headers: {
authorization: `Bearer ${options.apiKey}`,
"content-type": "application/json",
},
signal: abortController.signal,
body: JSON.stringify({
model: input.model || options.defaultModel,
// Always pin max_tokens to avoid hitting the cap mid-JSON and
// emitting truncated output. Real providers honor this; servers
// that cap harder will return a finish_reason of "length" but
// we'll still get usable text.
max_tokens: options.maxTokens ?? 4096,
// Request a single JSON response. Some OpenAI-compatible servers
// (e.g. local proxies like mr-auto) default to SSE streaming, and
// `response.json()` below would blow up on the chunked stream.
// Real providers like MiniMax accept and ignore this flag.
stream: false,
// Force JSON output for all OpenAI-compatible backends. Local
// foundation models (qwen-local behind llm.rikrdo.com) often
// ignore the "ALWAYS respond with JSON" system instruction and
// default to natural language — this flag tells the server to
// constrain the output to a JSON object. Real providers accept
// and honor it.
response_format: { type: "json_object" },
messages: [
{
role: "system",
content:
"You are SIC (Super Incident Commander), an internal incident management assistant. " +
"STRICT RULES: " +
"1) Your reply MUST be a single JSON object (no markdown, no prose wrapper) with EXACTLY three keys: " +
" - \"answer\" (string): the response to the user, may include markdown for tables/lists/code. " +
" - \"recommended_actions\" (array): each item MUST be an object with keys id (string, present in available_actions), confidence (number 0..1), reason (string), and \"type\": \"webhook\". " +
" - \"internal_docs\" (array): each item MUST be an object with keys id, title, source, relevance (number 0..1). " +
" Example shape: {\"answer\": \"...\", \"recommended_actions\": [{\"id\":\"vpn-diagnostic\",\"type\":\"webhook\",\"confidence\":0.8,\"reason\":\"matches VPN symptoms\"}], \"internal_docs\": [{\"id\":\"runbooks:vpn\",\"title\":\"VPN Runbook\",\"source\":\"runbooks/vpn.md\",\"relevance\":0.9}]}. " +
"2) recommended_actions may only include ids present in available_actions; never execute actions and never invent ids. The backend executes with confirmation. " +
"3) DO NOT invent company names, owners, integrations, customers, or external facts. If the user asks something not backed by internal_docs, available_actions, or the history, say explicitly that you do not have that information. " +
"4) When asked who you are or what company you belong to, only state that you are SIC (Super Incident Commander), an internal assistant; do not assume an owning company. " +
"5) Whenever an available_action is contextually relevant to the user's request OR the user asks which actions exist, include its id in recommended_actions so the user can see and execute it from the right panel. The right panel renders ONLY items present in recommended_actions, so omitting them hides them.",
},
...(input.skillPrompts ?? []).map((prompt) => ({
role: "system" as const,
content: prompt,
})),
...(input.systemPrompt && input.systemPrompt.trim().length > 0
? [{ role: "system" as const, content: input.systemPrompt.trim() }]
: []),
{
role: "system",
content: JSON.stringify({
internal_docs: input.docs,
available_actions: input.availableActions,
}),
},
...(input.history ?? []).map((message) => ({
role: message.role === "tool" ? "assistant" : message.role,
content: message.content,
})),
{ role: "user", content: input.message },
],
temperature: 0.2,
}),
}).finally(() => clearTimeout(timeout));
if (!response.ok) {
throw new Error(`llm_request_failed:${response.status}`);
}
const data = (await response.json()) as OpenAIChatResponse;
const content = data.choices?.[0]?.message?.content ?? "";
const parsed = safeJsonParse(content, input.docs, input.availableActions);
const usage = extractUsage(data.usage);
if (parsed.ok) {
return { ok: true, result: parsed.result, usage };
}
return {
ok: false,
error: parsed.error,
fallback: {
answer: content,
recommended_actions: [],
internal_docs: input.docs,
},
usage,
};
},
});
const extractUsage = (raw: OpenAIChatResponse["usage"]): PiChatUsage | undefined => {
if (!raw || typeof raw !== "object") return undefined;
const promptTokens = typeof raw.prompt_tokens === "number" ? raw.prompt_tokens : undefined;
const completionTokens = typeof raw.completion_tokens === "number" ? raw.completion_tokens : undefined;
const totalTokens = typeof raw.total_tokens === "number" ? raw.total_tokens : undefined;
const cachedTokens =
typeof raw.prompt_tokens_details?.cached_tokens === "number"
? raw.prompt_tokens_details.cached_tokens
: undefined;
if (promptTokens === undefined && completionTokens === undefined && totalTokens === undefined && cachedTokens === undefined) {
return undefined;
}
return { promptTokens, completionTokens, totalTokens, cachedTokens };
};

View File

@@ -0,0 +1,181 @@
import { describe, expect, it } from "vitest";
import {
createOpenAICompatiblePiAdapter,
} from "../src/index.js";
describe("pi-adapter structured errors", () => {
it("returns ok:false with no_content when LLM returns empty", async () => {
const server = await startMockLLM({ responseContent: "" });
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
const result = await pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [],
});
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.error.kind).toBe("no_content");
expect(result.fallback.answer).toBe("");
expect(result.fallback.recommended_actions).toEqual([]);
}
} finally {
await server.stop();
}
});
it("returns ok:false with json_parse when response has no JSON object", async () => {
const server = await startMockLLM({ responseContent: "Just plain text answer" });
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
const result = await pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [],
});
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.error.kind).toBe("json_parse");
expect(result.fallback.answer).toBe("Just plain text answer");
}
} finally {
await server.stop();
}
});
it("returns ok:true when response is well-formed JSON", async () => {
const server = await startMockLLM({
responseContent: JSON.stringify({
answer: "All good",
recommended_actions: [],
internal_docs: [],
}),
});
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
const result = await pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [],
});
expect(result.ok).toBe(true);
if (result.ok) {
expect(result.result.answer).toBe("All good");
}
} finally {
await server.stop();
}
});
it("throws on non-OK HTTP response (transport error, not parse error)", async () => {
const server = await startMockLLM({ status: 500, responseContent: "" });
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
await expect(
pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [],
}),
).rejects.toThrow(/llm_request_failed:500/);
} finally {
await server.stop();
}
});
it("filters recommended_actions to known ids and clamps scores", async () => {
const server = await startMockLLM({
responseContent: JSON.stringify({
answer: "ok",
recommended_actions: [
{ type: "webhook", id: "dns-flush", confidence: 2.5, reason: "x" },
{ type: "webhook", id: "unknown-id", confidence: 0.9, reason: "y" },
],
internal_docs: [],
}),
});
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
const result = await pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [
{ type: "webhook", id: "dns-flush", confidence: 0, reason: "r", requires_confirmation: true },
],
});
expect(result.ok).toBe(true);
if (result.ok) {
expect(result.result.recommended_actions).toHaveLength(1);
expect(result.result.recommended_actions[0]?.id).toBe("dns-flush");
expect(result.result.recommended_actions[0]?.confidence).toBe(1);
}
} finally {
await server.stop();
}
});
});
import { createServer, type Server } from "node:http";
async function startMockLLM(opts: { responseContent: string; status?: number }): Promise<{
baseUrl: string;
stop: () => Promise<void>;
}> {
let s: Server;
return await new Promise((resolve) => {
s = createServer((_req, res) => {
res.writeHead(opts.status ?? 200, { "content-type": "application/json" });
res.end(
JSON.stringify({
id: "mock",
object: "chat.completion",
created: 0,
model: "fast",
choices: [
{
index: 0,
message: { role: "assistant", content: opts.responseContent },
finish_reason: "stop",
},
],
}),
);
});
s.listen(0, "127.0.0.1", () => {
const address = s.address();
const port = typeof address === "object" && address ? address.port : 0;
resolve({
baseUrl: `http://127.0.0.1:${port}/v1`,
stop: () =>
new Promise<void>((res) => {
s!.close(() => res());
}),
});
});
});
}

View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "dist",
"rootDir": "src"
},
"include": ["src"]
}

View File

@@ -0,0 +1,16 @@
{
"name": "@pi-chat/shared",
"private": true,
"version": "0.1.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"scripts": {
"typecheck": "tsc --noEmit",
"lint": "tsc --noEmit"
},
"devDependencies": {
"typescript": "^5.8.3"
}
}

View File

@@ -0,0 +1,33 @@
export type AuthUser = {
id: string;
username?: string;
email?: string;
roles: string[];
};
export type RecommendedAction = {
type: "webhook";
id: string;
confidence: number;
reason: string;
requires_confirmation: boolean;
/**
* Optional soft signal derived from the user's audit history: e.g.
* "3 runs in last 7d, 100% success". The LLM may use this as a tiebreaker;
* the UI uses it to show a "Most used" tag.
*/
usageHint?: string;
};
export type InternalDocReference = {
id: string;
title: string;
source: string;
relevance: number;
};
export type ChatResult = {
answer: string;
recommended_actions: RecommendedAction[];
internal_docs: InternalDocReference[];
};

View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "dist",
"rootDir": "src"
},
"include": ["src"]
}

3246
pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load Diff

3
pnpm-workspace.yaml Normal file
View File

@@ -0,0 +1,3 @@
packages:
- "apps/*"
- "packages/*"

12
tsconfig.base.json Normal file
View File

@@ -0,0 +1,12 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "ESNext",
"moduleResolution": "Bundler",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true
}
}

10
vitest.config.ts Normal file
View File

@@ -0,0 +1,10 @@
import { defineConfig } from "vitest/config";
export default defineConfig({
test: {
include: ["apps/api/test/**/*.test.ts", "packages/pi-adapter/test/**/*.test.ts"],
environment: "node",
testTimeout: 20_000,
pool: "forks",
},
});