Initial commit: SIC harness (backend, web, pi-adapter, configs, docs)

- pnpm monorepo: apps/api (Fastify + SQLite + SSE), apps/web (React+Vite), packages/shared, packages/pi-adapter
- Local auth (admin/webhook-runner roles) + Keycloak JWT ready
- Multi-session chat with reliable history (user persisted before LLM, assistant persisted after stream)
- Markdown knowledge base with /api/docs/search + /api/docs/:id
- YAML webhook catalog with backend-only execution, retry/backoff, audit (webhook_runs), and per-user rate limit
- Skills config (sre-on-call, blameless-postmortem, security-incident) injected into LLM system prompt
- LLM provider failover chain (config/models.yml fallback + LLM_FALLBACK_CHAIN override)
- Context-aware webhooks panel + backend id-mention safety net
- Per-message stats (time/duration/tokens/model), Markdown+GFM render, code & table copy/download buttons
- Vitest suite, end-to-end smoke test (scripts/smoke.mjs), per-session system prompt override
- /metrics Prometheus endpoint + /api/metrics JSON, request-id correlation
- dotenv with explicit repo-root path; envString/envNumber helpers (handles empty-string env)
- Runbooks + SOPs under knowledge/ in English; README, docs, and INDEX.md in English
This commit is contained in:
2026-06-29 16:20:53 +02:00
commit 62728b2200
89 changed files with 11992 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
{
"name": "@pi-chat/pi-adapter",
"private": true,
"version": "0.1.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"scripts": {
"typecheck": "tsc --noEmit",
"lint": "tsc --noEmit"
},
"dependencies": {
"@pi-chat/shared": "workspace:*"
},
"devDependencies": {
"typescript": "^5.8.3"
}
}

View File

@@ -0,0 +1,354 @@
import type { ChatResult, InternalDocReference, RecommendedAction } from "@pi-chat/shared";
export type PiChatInput = {
message: string;
model: string;
docs: InternalDocReference[];
availableActions: RecommendedAction[];
history?: Array<{
role: "user" | "assistant" | "system" | "tool";
content: string;
}>;
skillPrompts?: string[];
// Optional per-session system prompt override. Inserted right after the
// base identity prompt and before skills, so it can extend the persona
// without contradicting the strict rules in the base prompt.
systemPrompt?: string | null;
};
export interface PiAdapter {
chat(input: PiChatInput): Promise<PiChatResult>;
}
export type PiChatError =
| { kind: "json_parse"; reason: string; rawContent: string }
| { kind: "schema"; reason: string; rawContent: string }
| { kind: "no_content"; message: string };
export type PiChatUsage = {
promptTokens?: number;
completionTokens?: number;
totalTokens?: number;
cachedTokens?: number;
durationMs?: number;
};
export type PiChatResult =
| { ok: true; result: ChatResult; usage?: PiChatUsage }
| { ok: false; error: PiChatError; fallback: ChatResult; usage?: PiChatUsage };
export type OpenAICompatiblePiAdapterOptions = {
baseUrl: string;
apiKey: string;
defaultModel: string;
timeoutMs?: number;
maxTokens?: number;
};
type OpenAIChatResponse = {
choices?: Array<{
message?: {
content?: string;
};
}>;
usage?: {
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
prompt_tokens_details?: { cached_tokens?: number };
};
};
const score = (value: unknown, fallback: number) => {
if (typeof value !== "number" || Number.isNaN(value)) return fallback;
return Math.max(0, Math.min(1, value));
};
// Extract an answer string from a parsed JSON object regardless of which key
// the model picked. Tries common names first, then falls back to the longest
// string-valued key. Returns null if parsed isn't an object or has no useful
// string field.
const extractAnswer = (parsed: unknown): string | null => {
if (!parsed || typeof parsed !== "object") return null;
const obj = parsed as Record<string, unknown>;
const preferredKeys = ["answer", "response", "output", "text", "content", "message", "result"];
for (const key of preferredKeys) {
const value = obj[key];
if (typeof value === "string" && value.trim().length > 0) return value;
}
let longest: { value: string; len: number } | null = null;
for (const value of Object.values(obj)) {
if (typeof value === "string") {
const trimmed = value.trim();
if (trimmed.length === 0) continue;
// Skip keys we already treat as arrays elsewhere (handled separately).
if (value === obj.recommended_actions || value === obj.internal_docs) continue;
if (!longest || trimmed.length > longest.len) longest = { value: trimmed, len: trimmed.length };
}
}
return longest?.value ?? null;
};
const normalizeResult = (
parsed: unknown,
fallbackAnswer: string,
docs: InternalDocReference[],
availableActions: RecommendedAction[],
): ChatResult => {
const docsById = new Map(docs.map((doc) => [doc.id, doc]));
const actionsById = new Map(availableActions.map((action) => [action.id, action]));
const obj = (parsed && typeof parsed === "object" ? parsed : {}) as Record<string, unknown>;
return {
answer: extractAnswer(parsed) ?? fallbackAnswer,
recommended_actions: Array.isArray(obj.recommended_actions)
? (obj.recommended_actions as unknown[]).flatMap((action) => {
// Loose mode: accept either an object with id, or a bare string id.
if (typeof action === "string") {
if (!actionsById.has(action)) return [];
const configured = actionsById.get(action)!;
return [{
type: configured.type ?? ("webhook" as const),
id: action,
confidence: 0,
reason: configured.reason ?? action,
requires_confirmation: configured.requires_confirmation ?? true,
}];
}
if (!action || typeof action !== "object") return [];
const candidate = action as Partial<RecommendedAction>;
if (!candidate.id || !actionsById.has(candidate.id)) return [];
const configured = actionsById.get(candidate.id)!;
return [{
type: "webhook" as const,
id: candidate.id,
confidence: score(candidate.confidence, 0),
reason: String(candidate.reason ?? configured.reason ?? "Suggested action"),
requires_confirmation: candidate.requires_confirmation ?? configured.requires_confirmation ?? true,
}];
})
: [],
internal_docs: Array.isArray(obj.internal_docs)
? (obj.internal_docs as unknown[]).flatMap((doc) => {
// Loose mode: accept either an object with id, or a bare string id.
if (typeof doc === "string") {
if (!docsById.has(doc)) return [];
const indexed = docsById.get(doc)!;
return [{ id: doc, title: indexed.title, source: indexed.source, relevance: 0 }];
}
if (!doc || typeof doc !== "object") return [];
const candidate = doc as Partial<InternalDocReference>;
if (!candidate.id || !docsById.has(candidate.id)) return [];
const indexed = docsById.get(candidate.id)!;
return [{
id: candidate.id,
title: String(candidate.title ?? indexed.title),
source: String(candidate.source ?? indexed.source),
relevance: score(candidate.relevance, indexed.relevance),
}];
})
: docs,
};
};
const safeJsonParse = (
value: string,
docs: InternalDocReference[],
availableActions: RecommendedAction[],
): { ok: true; result: ChatResult } | { ok: false; error: PiChatError } => {
if (!value || value.trim().length === 0) {
return {
ok: false,
error: { kind: "no_content", message: "LLM returned an empty message." },
};
}
const start = value.indexOf("{");
const end = value.lastIndexOf("}");
if (start === -1 || end === -1 || end <= start) {
return {
ok: false,
error: {
kind: "json_parse",
reason: "no JSON object delimiters found in response",
rawContent: value.slice(0, 500),
},
};
}
let parsed: unknown = null;
const slice = value.slice(start, end + 1);
try {
parsed = JSON.parse(slice);
} catch {
// Truncated JSON is a real failure mode for local models (vLLM /
// ollama often cut output mid-array when max_tokens conflicts with
// their actual context budget). Try to salvage the answer field via
// regex before giving up.
const salvaged = salvageAnswer(slice);
if (salvaged !== null) {
return {
ok: true,
result: {
answer: salvaged,
recommended_actions: [],
internal_docs: docs,
},
};
}
return {
ok: false,
error: {
kind: "json_parse",
reason: "JSON.parse failed on truncated or malformed output",
rawContent: value.slice(0, 500),
},
};
}
if (parsed === null || typeof parsed !== "object") {
return {
ok: false,
error: {
kind: "schema",
reason: "parsed value is not an object",
rawContent: value.slice(0, 500),
},
};
}
return { ok: true, result: normalizeResult(parsed, value, docs, availableActions) };
};
// Best-effort regex extraction of the answer field from truncated JSON.
// Matches the first key named like "answer" (with optional quote-prefix
// chars from quirky local models) and captures its string value.
const salvageAnswer = (slice: string): string | null => {
// Try the canonical key first, then common variants seen from local models.
const candidates = ['answer', 'response', 'output', 'text', 'content', 'message', 'result'];
for (const key of candidates) {
const re = new RegExp(`["']?${key}["']?\\s*:\\s*"((?:[^"\\\\]|\\\\.)*)"`);
const m = slice.match(re);
if (m && m[1] && m[1].trim().length > 0) {
return m[1].replace(/\\"/g, '"').replace(/\\n/g, "\n").replace(/\\t/g, "\t").replace(/\\\\/g, "\\");
}
}
return null;
};
export const createOpenAICompatiblePiAdapter = (options: OpenAICompatiblePiAdapterOptions): PiAdapter => ({
async chat(input) {
const abortController = new AbortController();
const timeout = setTimeout(() => abortController.abort(), options.timeoutMs ?? 30_000);
const response = await fetch(`${options.baseUrl.replace(/\/$/, "")}/chat/completions`, {
method: "POST",
headers: {
authorization: `Bearer ${options.apiKey}`,
"content-type": "application/json",
},
signal: abortController.signal,
body: JSON.stringify({
model: input.model || options.defaultModel,
// Always pin max_tokens to avoid hitting the cap mid-JSON and
// emitting truncated output. Real providers honor this; servers
// that cap harder will return a finish_reason of "length" but
// we'll still get usable text.
max_tokens: options.maxTokens ?? 4096,
// Request a single JSON response. Some OpenAI-compatible servers
// (e.g. local proxies like mr-auto) default to SSE streaming, and
// `response.json()` below would blow up on the chunked stream.
// Real providers like MiniMax accept and ignore this flag.
stream: false,
// Force JSON output for all OpenAI-compatible backends. Local
// foundation models (qwen-local behind llm.rikrdo.com) often
// ignore the "ALWAYS respond with JSON" system instruction and
// default to natural language — this flag tells the server to
// constrain the output to a JSON object. Real providers accept
// and honor it.
response_format: { type: "json_object" },
messages: [
{
role: "system",
content:
"You are SIC (Super Incident Commander), an internal incident management assistant. " +
"STRICT RULES: " +
"1) Your reply MUST be a single JSON object (no markdown, no prose wrapper) with EXACTLY three keys: " +
" - \"answer\" (string): the response to the user, may include markdown for tables/lists/code. " +
" - \"recommended_actions\" (array): each item MUST be an object with keys id (string, present in available_actions), confidence (number 0..1), reason (string), and \"type\": \"webhook\". " +
" - \"internal_docs\" (array): each item MUST be an object with keys id, title, source, relevance (number 0..1). " +
" Example shape: {\"answer\": \"...\", \"recommended_actions\": [{\"id\":\"vpn-diagnostic\",\"type\":\"webhook\",\"confidence\":0.8,\"reason\":\"matches VPN symptoms\"}], \"internal_docs\": [{\"id\":\"runbooks:vpn\",\"title\":\"VPN Runbook\",\"source\":\"runbooks/vpn.md\",\"relevance\":0.9}]}. " +
"2) recommended_actions may only include ids present in available_actions; never execute actions and never invent ids. The backend executes with confirmation. " +
"3) DO NOT invent company names, owners, integrations, customers, or external facts. If the user asks something not backed by internal_docs, available_actions, or the history, say explicitly that you do not have that information. " +
"4) When asked who you are or what company you belong to, only state that you are SIC (Super Incident Commander), an internal assistant; do not assume an owning company. " +
"5) Whenever an available_action is contextually relevant to the user's request OR the user asks which actions exist, include its id in recommended_actions so the user can see and execute it from the right panel. The right panel renders ONLY items present in recommended_actions, so omitting them hides them.",
},
...(input.skillPrompts ?? []).map((prompt) => ({
role: "system" as const,
content: prompt,
})),
...(input.systemPrompt && input.systemPrompt.trim().length > 0
? [{ role: "system" as const, content: input.systemPrompt.trim() }]
: []),
{
role: "system",
content: JSON.stringify({
internal_docs: input.docs,
available_actions: input.availableActions,
}),
},
...(input.history ?? []).map((message) => ({
role: message.role === "tool" ? "assistant" : message.role,
content: message.content,
})),
{ role: "user", content: input.message },
],
temperature: 0.2,
}),
}).finally(() => clearTimeout(timeout));
if (!response.ok) {
throw new Error(`llm_request_failed:${response.status}`);
}
const data = (await response.json()) as OpenAIChatResponse;
const content = data.choices?.[0]?.message?.content ?? "";
const parsed = safeJsonParse(content, input.docs, input.availableActions);
const usage = extractUsage(data.usage);
if (parsed.ok) {
return { ok: true, result: parsed.result, usage };
}
return {
ok: false,
error: parsed.error,
fallback: {
answer: content,
recommended_actions: [],
internal_docs: input.docs,
},
usage,
};
},
});
const extractUsage = (raw: OpenAIChatResponse["usage"]): PiChatUsage | undefined => {
if (!raw || typeof raw !== "object") return undefined;
const promptTokens = typeof raw.prompt_tokens === "number" ? raw.prompt_tokens : undefined;
const completionTokens = typeof raw.completion_tokens === "number" ? raw.completion_tokens : undefined;
const totalTokens = typeof raw.total_tokens === "number" ? raw.total_tokens : undefined;
const cachedTokens =
typeof raw.prompt_tokens_details?.cached_tokens === "number"
? raw.prompt_tokens_details.cached_tokens
: undefined;
if (promptTokens === undefined && completionTokens === undefined && totalTokens === undefined && cachedTokens === undefined) {
return undefined;
}
return { promptTokens, completionTokens, totalTokens, cachedTokens };
};

View File

@@ -0,0 +1,181 @@
import { describe, expect, it } from "vitest";
import {
createOpenAICompatiblePiAdapter,
} from "../src/index.js";
describe("pi-adapter structured errors", () => {
it("returns ok:false with no_content when LLM returns empty", async () => {
const server = await startMockLLM({ responseContent: "" });
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
const result = await pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [],
});
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.error.kind).toBe("no_content");
expect(result.fallback.answer).toBe("");
expect(result.fallback.recommended_actions).toEqual([]);
}
} finally {
await server.stop();
}
});
it("returns ok:false with json_parse when response has no JSON object", async () => {
const server = await startMockLLM({ responseContent: "Just plain text answer" });
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
const result = await pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [],
});
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.error.kind).toBe("json_parse");
expect(result.fallback.answer).toBe("Just plain text answer");
}
} finally {
await server.stop();
}
});
it("returns ok:true when response is well-formed JSON", async () => {
const server = await startMockLLM({
responseContent: JSON.stringify({
answer: "All good",
recommended_actions: [],
internal_docs: [],
}),
});
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
const result = await pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [],
});
expect(result.ok).toBe(true);
if (result.ok) {
expect(result.result.answer).toBe("All good");
}
} finally {
await server.stop();
}
});
it("throws on non-OK HTTP response (transport error, not parse error)", async () => {
const server = await startMockLLM({ status: 500, responseContent: "" });
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
await expect(
pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [],
}),
).rejects.toThrow(/llm_request_failed:500/);
} finally {
await server.stop();
}
});
it("filters recommended_actions to known ids and clamps scores", async () => {
const server = await startMockLLM({
responseContent: JSON.stringify({
answer: "ok",
recommended_actions: [
{ type: "webhook", id: "dns-flush", confidence: 2.5, reason: "x" },
{ type: "webhook", id: "unknown-id", confidence: 0.9, reason: "y" },
],
internal_docs: [],
}),
});
try {
const pi = createOpenAICompatiblePiAdapter({
baseUrl: server.baseUrl,
apiKey: "test",
defaultModel: "fast",
});
const result = await pi.chat({
message: "hi",
model: "fast",
docs: [],
availableActions: [
{ type: "webhook", id: "dns-flush", confidence: 0, reason: "r", requires_confirmation: true },
],
});
expect(result.ok).toBe(true);
if (result.ok) {
expect(result.result.recommended_actions).toHaveLength(1);
expect(result.result.recommended_actions[0]?.id).toBe("dns-flush");
expect(result.result.recommended_actions[0]?.confidence).toBe(1);
}
} finally {
await server.stop();
}
});
});
import { createServer, type Server } from "node:http";
async function startMockLLM(opts: { responseContent: string; status?: number }): Promise<{
baseUrl: string;
stop: () => Promise<void>;
}> {
let s: Server;
return await new Promise((resolve) => {
s = createServer((_req, res) => {
res.writeHead(opts.status ?? 200, { "content-type": "application/json" });
res.end(
JSON.stringify({
id: "mock",
object: "chat.completion",
created: 0,
model: "fast",
choices: [
{
index: 0,
message: { role: "assistant", content: opts.responseContent },
finish_reason: "stop",
},
],
}),
);
});
s.listen(0, "127.0.0.1", () => {
const address = s.address();
const port = typeof address === "object" && address ? address.port : 0;
resolve({
baseUrl: `http://127.0.0.1:${port}/v1`,
stop: () =>
new Promise<void>((res) => {
s!.close(() => res());
}),
});
});
});
}

View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "dist",
"rootDir": "src"
},
"include": ["src"]
}

View File

@@ -0,0 +1,16 @@
{
"name": "@pi-chat/shared",
"private": true,
"version": "0.1.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"scripts": {
"typecheck": "tsc --noEmit",
"lint": "tsc --noEmit"
},
"devDependencies": {
"typescript": "^5.8.3"
}
}

View File

@@ -0,0 +1,33 @@
export type AuthUser = {
id: string;
username?: string;
email?: string;
roles: string[];
};
export type RecommendedAction = {
type: "webhook";
id: string;
confidence: number;
reason: string;
requires_confirmation: boolean;
/**
* Optional soft signal derived from the user's audit history: e.g.
* "3 runs in last 7d, 100% success". The LLM may use this as a tiebreaker;
* the UI uses it to show a "Most used" tag.
*/
usageHint?: string;
};
export type InternalDocReference = {
id: string;
title: string;
source: string;
relevance: number;
};
export type ChatResult = {
answer: string;
recommended_actions: RecommendedAction[];
internal_docs: InternalDocReference[];
};

View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "dist",
"rootDir": "src"
},
"include": ["src"]
}