Initial commit: SIC harness (backend, web, pi-adapter, configs, docs)

- pnpm monorepo: apps/api (Fastify + SQLite + SSE), apps/web (React+Vite), packages/shared, packages/pi-adapter - Local auth (admin/webhook-runner roles) + Keycloak JWT ready - Multi-session chat with reliable history (user persisted before LLM, assistant persisted after stream) - Markdown knowledge base with /api/docs/search + /api/docs/:id - YAML webhook catalog with backend-only execution, retry/backoff, audit (webhook_runs), and per-user rate limit - Skills config (sre-on-call, blameless-postmortem, security-incident) injected into LLM system prompt - LLM provider failover chain (config/models.yml fallback + LLM_FALLBACK_CHAIN override) - Context-aware webhooks panel + backend id-mention safety net - Per-message stats (time/duration/tokens/model), Markdown+GFM render, code & table copy/download buttons - Vitest suite, end-to-end smoke test (scripts/smoke.mjs), per-session system prompt override - /metrics Prometheus endpoint + /api/metrics JSON, request-id correlation - dotenv with explicit repo-root path; envString/envNumber helpers (handles empty-string env) - Runbooks + SOPs under knowledge/ in English; README, docs, and INDEX.md in English
2026-06-29 16:20:53 +02:00
commit 62728b2200
89 changed files with 11992 additions and 0 deletions
--- a/packages/pi-adapter/package.json
+++ b/packages/pi-adapter/package.json
@@ -0,0 +1,19 @@
+{
+  "name": "@pi-chat/pi-adapter",
+  "private": true,
+  "version": "0.1.0",
+  "type": "module",
+  "exports": {
+    ".": "./src/index.ts"
+  },
+  "scripts": {
+    "typecheck": "tsc --noEmit",
+    "lint": "tsc --noEmit"
+  },
+  "dependencies": {
+    "@pi-chat/shared": "workspace:*"
+  },
+  "devDependencies": {
+    "typescript": "^5.8.3"
+  }
+}
--- a/packages/pi-adapter/src/index.ts
+++ b/packages/pi-adapter/src/index.ts
@@ -0,0 +1,354 @@
+import type { ChatResult, InternalDocReference, RecommendedAction } from "@pi-chat/shared";
+
+export type PiChatInput = {
+  message: string;
+  model: string;
+  docs: InternalDocReference[];
+  availableActions: RecommendedAction[];
+  history?: Array<{
+    role: "user" | "assistant" | "system" | "tool";
+    content: string;
+  }>;
+  skillPrompts?: string[];
+  // Optional per-session system prompt override. Inserted right after the
+  // base identity prompt and before skills, so it can extend the persona
+  // without contradicting the strict rules in the base prompt.
+  systemPrompt?: string | null;
+};
+
+export interface PiAdapter {
+  chat(input: PiChatInput): Promise<PiChatResult>;
+}
+
+export type PiChatError =
+  | { kind: "json_parse"; reason: string; rawContent: string }
+  | { kind: "schema"; reason: string; rawContent: string }
+  | { kind: "no_content"; message: string };
+
+export type PiChatUsage = {
+  promptTokens?: number;
+  completionTokens?: number;
+  totalTokens?: number;
+  cachedTokens?: number;
+  durationMs?: number;
+};
+
+export type PiChatResult =
+  | { ok: true; result: ChatResult; usage?: PiChatUsage }
+  | { ok: false; error: PiChatError; fallback: ChatResult; usage?: PiChatUsage };
+
+export type OpenAICompatiblePiAdapterOptions = {
+  baseUrl: string;
+  apiKey: string;
+  defaultModel: string;
+  timeoutMs?: number;
+  maxTokens?: number;
+};
+
+type OpenAIChatResponse = {
+  choices?: Array<{
+    message?: {
+      content?: string;
+    };
+  }>;
+  usage?: {
+    prompt_tokens?: number;
+    completion_tokens?: number;
+    total_tokens?: number;
+    prompt_tokens_details?: { cached_tokens?: number };
+  };
+};
+
+const score = (value: unknown, fallback: number) => {
+  if (typeof value !== "number" || Number.isNaN(value)) return fallback;
+  return Math.max(0, Math.min(1, value));
+};
+
+// Extract an answer string from a parsed JSON object regardless of which key
+// the model picked. Tries common names first, then falls back to the longest
+// string-valued key. Returns null if parsed isn't an object or has no useful
+// string field.
+const extractAnswer = (parsed: unknown): string | null => {
+  if (!parsed || typeof parsed !== "object") return null;
+  const obj = parsed as Record<string, unknown>;
+  const preferredKeys = ["answer", "response", "output", "text", "content", "message", "result"];
+  for (const key of preferredKeys) {
+    const value = obj[key];
+    if (typeof value === "string" && value.trim().length > 0) return value;
+  }
+  let longest: { value: string; len: number } | null = null;
+  for (const value of Object.values(obj)) {
+    if (typeof value === "string") {
+      const trimmed = value.trim();
+      if (trimmed.length === 0) continue;
+      // Skip keys we already treat as arrays elsewhere (handled separately).
+      if (value === obj.recommended_actions || value === obj.internal_docs) continue;
+      if (!longest || trimmed.length > longest.len) longest = { value: trimmed, len: trimmed.length };
+    }
+  }
+  return longest?.value ?? null;
+};
+
+const normalizeResult = (
+  parsed: unknown,
+  fallbackAnswer: string,
+  docs: InternalDocReference[],
+  availableActions: RecommendedAction[],
+): ChatResult => {
+  const docsById = new Map(docs.map((doc) => [doc.id, doc]));
+  const actionsById = new Map(availableActions.map((action) => [action.id, action]));
+  const obj = (parsed && typeof parsed === "object" ? parsed : {}) as Record<string, unknown>;
+
+  return {
+    answer: extractAnswer(parsed) ?? fallbackAnswer,
+    recommended_actions: Array.isArray(obj.recommended_actions)
+      ? (obj.recommended_actions as unknown[]).flatMap((action) => {
+          // Loose mode: accept either an object with id, or a bare string id.
+          if (typeof action === "string") {
+            if (!actionsById.has(action)) return [];
+            const configured = actionsById.get(action)!;
+            return [{
+              type: configured.type ?? ("webhook" as const),
+              id: action,
+              confidence: 0,
+              reason: configured.reason ?? action,
+              requires_confirmation: configured.requires_confirmation ?? true,
+            }];
+          }
+          if (!action || typeof action !== "object") return [];
+
+          const candidate = action as Partial<RecommendedAction>;
+          if (!candidate.id || !actionsById.has(candidate.id)) return [];
+
+          const configured = actionsById.get(candidate.id)!;
+          return [{
+            type: "webhook" as const,
+            id: candidate.id,
+            confidence: score(candidate.confidence, 0),
+            reason: String(candidate.reason ?? configured.reason ?? "Suggested action"),
+            requires_confirmation: candidate.requires_confirmation ?? configured.requires_confirmation ?? true,
+          }];
+        })
+      : [],
+    internal_docs: Array.isArray(obj.internal_docs)
+      ? (obj.internal_docs as unknown[]).flatMap((doc) => {
+          // Loose mode: accept either an object with id, or a bare string id.
+          if (typeof doc === "string") {
+            if (!docsById.has(doc)) return [];
+            const indexed = docsById.get(doc)!;
+            return [{ id: doc, title: indexed.title, source: indexed.source, relevance: 0 }];
+          }
+          if (!doc || typeof doc !== "object") return [];
+
+          const candidate = doc as Partial<InternalDocReference>;
+          if (!candidate.id || !docsById.has(candidate.id)) return [];
+
+          const indexed = docsById.get(candidate.id)!;
+          return [{
+            id: candidate.id,
+            title: String(candidate.title ?? indexed.title),
+            source: String(candidate.source ?? indexed.source),
+            relevance: score(candidate.relevance, indexed.relevance),
+          }];
+        })
+      : docs,
+  };
+};
+
+const safeJsonParse = (
+  value: string,
+  docs: InternalDocReference[],
+  availableActions: RecommendedAction[],
+): { ok: true; result: ChatResult } | { ok: false; error: PiChatError } => {
+  if (!value || value.trim().length === 0) {
+    return {
+      ok: false,
+      error: { kind: "no_content", message: "LLM returned an empty message." },
+    };
+  }
+
+  const start = value.indexOf("{");
+  const end = value.lastIndexOf("}");
+  if (start === -1 || end === -1 || end <= start) {
+    return {
+      ok: false,
+      error: {
+        kind: "json_parse",
+        reason: "no JSON object delimiters found in response",
+        rawContent: value.slice(0, 500),
+      },
+    };
+  }
+
+  let parsed: unknown = null;
+  const slice = value.slice(start, end + 1);
+  try {
+    parsed = JSON.parse(slice);
+  } catch {
+    // Truncated JSON is a real failure mode for local models (vLLM /
+    // ollama often cut output mid-array when max_tokens conflicts with
+    // their actual context budget). Try to salvage the answer field via
+    // regex before giving up.
+    const salvaged = salvageAnswer(slice);
+    if (salvaged !== null) {
+      return {
+        ok: true,
+        result: {
+          answer: salvaged,
+          recommended_actions: [],
+          internal_docs: docs,
+        },
+      };
+    }
+    return {
+      ok: false,
+      error: {
+        kind: "json_parse",
+        reason: "JSON.parse failed on truncated or malformed output",
+        rawContent: value.slice(0, 500),
+      },
+    };
+  }
+
+  if (parsed === null || typeof parsed !== "object") {
+    return {
+      ok: false,
+      error: {
+        kind: "schema",
+        reason: "parsed value is not an object",
+        rawContent: value.slice(0, 500),
+      },
+    };
+  }
+
+  return { ok: true, result: normalizeResult(parsed, value, docs, availableActions) };
+};
+
+// Best-effort regex extraction of the answer field from truncated JSON.
+// Matches the first key named like "answer" (with optional quote-prefix
+// chars from quirky local models) and captures its string value.
+const salvageAnswer = (slice: string): string | null => {
+  // Try the canonical key first, then common variants seen from local models.
+  const candidates = ['answer', 'response', 'output', 'text', 'content', 'message', 'result'];
+  for (const key of candidates) {
+    const re = new RegExp(`["']?${key}["']?\\s*:\\s*"((?:[^"\\\\]|\\\\.)*)"`);
+    const m = slice.match(re);
+    if (m && m[1] && m[1].trim().length > 0) {
+      return m[1].replace(/\\"/g, '"').replace(/\\n/g, "\n").replace(/\\t/g, "\t").replace(/\\\\/g, "\\");
+    }
+  }
+  return null;
+};
+
+export const createOpenAICompatiblePiAdapter = (options: OpenAICompatiblePiAdapterOptions): PiAdapter => ({
+  async chat(input) {
+    const abortController = new AbortController();
+    const timeout = setTimeout(() => abortController.abort(), options.timeoutMs ?? 30_000);
+
+    const response = await fetch(`${options.baseUrl.replace(/\/$/, "")}/chat/completions`, {
+      method: "POST",
+      headers: {
+        authorization: `Bearer ${options.apiKey}`,
+        "content-type": "application/json",
+      },
+      signal: abortController.signal,
+      body: JSON.stringify({
+        model: input.model || options.defaultModel,
+        // Always pin max_tokens to avoid hitting the cap mid-JSON and
+        // emitting truncated output. Real providers honor this; servers
+        // that cap harder will return a finish_reason of "length" but
+        // we'll still get usable text.
+        max_tokens: options.maxTokens ?? 4096,
+        // Request a single JSON response. Some OpenAI-compatible servers
+        // (e.g. local proxies like mr-auto) default to SSE streaming, and
+        // `response.json()` below would blow up on the chunked stream.
+        // Real providers like MiniMax accept and ignore this flag.
+        stream: false,
+        // Force JSON output for all OpenAI-compatible backends. Local
+        // foundation models (qwen-local behind llm.rikrdo.com) often
+        // ignore the "ALWAYS respond with JSON" system instruction and
+        // default to natural language — this flag tells the server to
+        // constrain the output to a JSON object. Real providers accept
+        // and honor it.
+        response_format: { type: "json_object" },
+        messages: [
+          {
+            role: "system",
+            content:
+              "You are SIC (Super Incident Commander), an internal incident management assistant. " +
+              "STRICT RULES: " +
+              "1) Your reply MUST be a single JSON object (no markdown, no prose wrapper) with EXACTLY three keys: " +
+              "   - \"answer\" (string): the response to the user, may include markdown for tables/lists/code. " +
+              "   - \"recommended_actions\" (array): each item MUST be an object with keys id (string, present in available_actions), confidence (number 0..1), reason (string), and \"type\": \"webhook\". " +
+              "   - \"internal_docs\" (array): each item MUST be an object with keys id, title, source, relevance (number 0..1). " +
+              "   Example shape: {\"answer\": \"...\", \"recommended_actions\": [{\"id\":\"vpn-diagnostic\",\"type\":\"webhook\",\"confidence\":0.8,\"reason\":\"matches VPN symptoms\"}], \"internal_docs\": [{\"id\":\"runbooks:vpn\",\"title\":\"VPN Runbook\",\"source\":\"runbooks/vpn.md\",\"relevance\":0.9}]}. " +
+              "2) recommended_actions may only include ids present in available_actions; never execute actions and never invent ids. The backend executes with confirmation. " +
+              "3) DO NOT invent company names, owners, integrations, customers, or external facts. If the user asks something not backed by internal_docs, available_actions, or the history, say explicitly that you do not have that information. " +
+              "4) When asked who you are or what company you belong to, only state that you are SIC (Super Incident Commander), an internal assistant; do not assume an owning company. " +
+              "5) Whenever an available_action is contextually relevant to the user's request OR the user asks which actions exist, include its id in recommended_actions so the user can see and execute it from the right panel. The right panel renders ONLY items present in recommended_actions, so omitting them hides them.",
+          },
+          ...(input.skillPrompts ?? []).map((prompt) => ({
+            role: "system" as const,
+            content: prompt,
+          })),
+          ...(input.systemPrompt && input.systemPrompt.trim().length > 0
+            ? [{ role: "system" as const, content: input.systemPrompt.trim() }]
+            : []),
+          {
+            role: "system",
+            content: JSON.stringify({
+              internal_docs: input.docs,
+              available_actions: input.availableActions,
+            }),
+          },
+          ...(input.history ?? []).map((message) => ({
+            role: message.role === "tool" ? "assistant" : message.role,
+            content: message.content,
+          })),
+          { role: "user", content: input.message },
+        ],
+        temperature: 0.2,
+      }),
+    }).finally(() => clearTimeout(timeout));
+
+    if (!response.ok) {
+      throw new Error(`llm_request_failed:${response.status}`);
+    }
+
+    const data = (await response.json()) as OpenAIChatResponse;
+    const content = data.choices?.[0]?.message?.content ?? "";
+    const parsed = safeJsonParse(content, input.docs, input.availableActions);
+
+    const usage = extractUsage(data.usage);
+
+    if (parsed.ok) {
+      return { ok: true, result: parsed.result, usage };
+    }
+
+    return {
+      ok: false,
+      error: parsed.error,
+      fallback: {
+        answer: content,
+        recommended_actions: [],
+        internal_docs: input.docs,
+      },
+      usage,
+    };
+  },
+});
+
+const extractUsage = (raw: OpenAIChatResponse["usage"]): PiChatUsage | undefined => {
+  if (!raw || typeof raw !== "object") return undefined;
+  const promptTokens = typeof raw.prompt_tokens === "number" ? raw.prompt_tokens : undefined;
+  const completionTokens = typeof raw.completion_tokens === "number" ? raw.completion_tokens : undefined;
+  const totalTokens = typeof raw.total_tokens === "number" ? raw.total_tokens : undefined;
+  const cachedTokens =
+    typeof raw.prompt_tokens_details?.cached_tokens === "number"
+      ? raw.prompt_tokens_details.cached_tokens
+      : undefined;
+  if (promptTokens === undefined && completionTokens === undefined && totalTokens === undefined && cachedTokens === undefined) {
+    return undefined;
+  }
+  return { promptTokens, completionTokens, totalTokens, cachedTokens };
+};
--- a/packages/pi-adapter/test/index.test.ts
+++ b/packages/pi-adapter/test/index.test.ts
@@ -0,0 +1,181 @@
+import { describe, expect, it } from "vitest";
+import {
+  createOpenAICompatiblePiAdapter,
+} from "../src/index.js";
+
+describe("pi-adapter structured errors", () => {
+  it("returns ok:false with no_content when LLM returns empty", async () => {
+    const server = await startMockLLM({ responseContent: "" });
+    try {
+      const pi = createOpenAICompatiblePiAdapter({
+        baseUrl: server.baseUrl,
+        apiKey: "test",
+        defaultModel: "fast",
+      });
+      const result = await pi.chat({
+        message: "hi",
+        model: "fast",
+        docs: [],
+        availableActions: [],
+      });
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.kind).toBe("no_content");
+        expect(result.fallback.answer).toBe("");
+        expect(result.fallback.recommended_actions).toEqual([]);
+      }
+    } finally {
+      await server.stop();
+    }
+  });
+
+  it("returns ok:false with json_parse when response has no JSON object", async () => {
+    const server = await startMockLLM({ responseContent: "Just plain text answer" });
+    try {
+      const pi = createOpenAICompatiblePiAdapter({
+        baseUrl: server.baseUrl,
+        apiKey: "test",
+        defaultModel: "fast",
+      });
+      const result = await pi.chat({
+        message: "hi",
+        model: "fast",
+        docs: [],
+        availableActions: [],
+      });
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.kind).toBe("json_parse");
+        expect(result.fallback.answer).toBe("Just plain text answer");
+      }
+    } finally {
+      await server.stop();
+    }
+  });
+
+  it("returns ok:true when response is well-formed JSON", async () => {
+    const server = await startMockLLM({
+      responseContent: JSON.stringify({
+        answer: "All good",
+        recommended_actions: [],
+        internal_docs: [],
+      }),
+    });
+    try {
+      const pi = createOpenAICompatiblePiAdapter({
+        baseUrl: server.baseUrl,
+        apiKey: "test",
+        defaultModel: "fast",
+      });
+      const result = await pi.chat({
+        message: "hi",
+        model: "fast",
+        docs: [],
+        availableActions: [],
+      });
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.answer).toBe("All good");
+      }
+    } finally {
+      await server.stop();
+    }
+  });
+
+  it("throws on non-OK HTTP response (transport error, not parse error)", async () => {
+    const server = await startMockLLM({ status: 500, responseContent: "" });
+    try {
+      const pi = createOpenAICompatiblePiAdapter({
+        baseUrl: server.baseUrl,
+        apiKey: "test",
+        defaultModel: "fast",
+      });
+      await expect(
+        pi.chat({
+          message: "hi",
+          model: "fast",
+          docs: [],
+          availableActions: [],
+        }),
+      ).rejects.toThrow(/llm_request_failed:500/);
+    } finally {
+      await server.stop();
+    }
+  });
+
+  it("filters recommended_actions to known ids and clamps scores", async () => {
+    const server = await startMockLLM({
+      responseContent: JSON.stringify({
+        answer: "ok",
+        recommended_actions: [
+          { type: "webhook", id: "dns-flush", confidence: 2.5, reason: "x" },
+          { type: "webhook", id: "unknown-id", confidence: 0.9, reason: "y" },
+        ],
+        internal_docs: [],
+      }),
+    });
+    try {
+      const pi = createOpenAICompatiblePiAdapter({
+        baseUrl: server.baseUrl,
+        apiKey: "test",
+        defaultModel: "fast",
+      });
+      const result = await pi.chat({
+        message: "hi",
+        model: "fast",
+        docs: [],
+        availableActions: [
+          { type: "webhook", id: "dns-flush", confidence: 0, reason: "r", requires_confirmation: true },
+        ],
+      });
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.recommended_actions).toHaveLength(1);
+        expect(result.result.recommended_actions[0]?.id).toBe("dns-flush");
+        expect(result.result.recommended_actions[0]?.confidence).toBe(1);
+      }
+    } finally {
+      await server.stop();
+    }
+  });
+});
+
+import { createServer, type Server } from "node:http";
+
+async function startMockLLM(opts: { responseContent: string; status?: number }): Promise<{
+  baseUrl: string;
+  stop: () => Promise<void>;
+}> {
+  let s: Server;
+  return await new Promise((resolve) => {
+    s = createServer((_req, res) => {
+      res.writeHead(opts.status ?? 200, { "content-type": "application/json" });
+      res.end(
+        JSON.stringify({
+          id: "mock",
+          object: "chat.completion",
+          created: 0,
+          model: "fast",
+          choices: [
+            {
+              index: 0,
+              message: { role: "assistant", content: opts.responseContent },
+              finish_reason: "stop",
+            },
+          ],
+        }),
+      );
+    });
+    s.listen(0, "127.0.0.1", () => {
+      const address = s.address();
+      const port = typeof address === "object" && address ? address.port : 0;
+      resolve({
+        baseUrl: `http://127.0.0.1:${port}/v1`,
+        stop: () =>
+          new Promise<void>((res) => {
+            s!.close(() => res());
+          }),
+      });
+    });
+  });
+}
--- a/packages/pi-adapter/tsconfig.json
+++ b/packages/pi-adapter/tsconfig.json
@@ -0,0 +1,8 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "dist",
+    "rootDir": "src"
+  },
+  "include": ["src"]
+}
--- a/packages/shared/package.json
+++ b/packages/shared/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "@pi-chat/shared",
+  "private": true,
+  "version": "0.1.0",
+  "type": "module",
+  "exports": {
+    ".": "./src/index.ts"
+  },
+  "scripts": {
+    "typecheck": "tsc --noEmit",
+    "lint": "tsc --noEmit"
+  },
+  "devDependencies": {
+    "typescript": "^5.8.3"
+  }
+}
--- a/packages/shared/src/index.ts
+++ b/packages/shared/src/index.ts
@@ -0,0 +1,33 @@
+export type AuthUser = {
+  id: string;
+  username?: string;
+  email?: string;
+  roles: string[];
+};
+
+export type RecommendedAction = {
+  type: "webhook";
+  id: string;
+  confidence: number;
+  reason: string;
+  requires_confirmation: boolean;
+  /**
+   * Optional soft signal derived from the user's audit history: e.g.
+   * "3 runs in last 7d, 100% success". The LLM may use this as a tiebreaker;
+   * the UI uses it to show a "Most used" tag.
+   */
+  usageHint?: string;
+};
+
+export type InternalDocReference = {
+  id: string;
+  title: string;
+  source: string;
+  relevance: number;
+};
+
+export type ChatResult = {
+  answer: string;
+  recommended_actions: RecommendedAction[];
+  internal_docs: InternalDocReference[];
+};
--- a/packages/shared/tsconfig.json
+++ b/packages/shared/tsconfig.json
@@ -0,0 +1,8 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "dist",
+    "rootDir": "src"
+  },
+  "include": ["src"]
+}