arete-org
diff --git a/‎packages/backend/src/shared/prompts/defaults.yaml‎
Lines changed: 218 additions & 218 deletions b/‎packages/backend/src/shared/prompts/defaults.yaml‎
Lines changed: 218 additions & 218 deletions
diff --git a/‎packages/discord-bot/package.json‎
Lines changed: 7 additions & 0 deletions b/‎packages/discord-bot/package.json‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎packages/discord-bot/src/filters/inbound/index.ts‎
Lines changed: 17 additions & 0 deletions b/‎packages/discord-bot/src/filters/inbound/index.ts‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎packages/discord-bot/src/filters/outbound/index.ts‎
Lines changed: 62 additions & 0 deletions b/‎packages/discord-bot/src/filters/outbound/index.ts‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎packages/discord-bot/src/filters/outbound/normalizeLinks.ts‎
Lines changed: 214 additions & 0 deletions b/‎packages/discord-bot/src/filters/outbound/normalizeLinks.ts‎
Lines changed: 214 additions & 0 deletions
diff --git a/‎packages/discord-bot/src/filters/outbound/types.ts‎
Lines changed: 15 additions & 0 deletions b/‎packages/discord-bot/src/filters/outbound/types.ts‎
Lines changed: 15 additions & 0 deletions
@@ -32,20 +32,27 @@
         "express": "catalog:",
         "flyio": "catalog:",
         "js-yaml": "catalog:",
+        "linkify-it": "^5.0.0",
         "mime-types": "catalog:",
         "node-fetch": "catalog:",
         "only": "catalog:",
         "openai": "catalog:",
         "opusscript": "catalog:",
         "prism-media": "catalog:",
         "reindex": "catalog:",
+        "remark-parse": "^11.0.0",
+        "unified": "^11.0.5",
+        "unist-util-visit": "^5.0.0",
         "winston": "catalog:",
         "ws": "catalog:"
     },
     "devDependencies": {
         "@types/body-parser": "catalog:",
         "@types/express": "catalog:",
+        "@types/linkify-it": "^5.0.0",
+        "@types/mdast": "^4.0.4",
         "@types/node": "catalog:",
+        "@types/unist": "^3.0.3",
         "copyfiles": "catalog:",
         "cross-env": "catalog:",
         "rimraf": "catalog:",
 
@@ -0,0 +1,17 @@
+/**
+ * @description: Placeholder entry point for inbound filters (pre-processing) before core logic.
+ * @arete-scope: interface
+ * @arete-module: InboundFilters
+ * @arete-risk: low - No active filters means no behavioral changes yet.
+ * @arete-ethics: low - Placeholder does not alter user content.
+ */
+
+export interface InboundFilterResult {
+    content: string;
+    changes: string[];
+}
+
+// Reserved for future inbound filtering; currently a no-op.
+export const runInboundFilters = (content: string): InboundFilterResult => {
+    return { content, changes: [] };
+};
@@ -0,0 +1,62 @@
+/**
+ * @description: Runs outbound message filters before content is sent to Discord.
+ * @arete-scope: interface
+ * @arete-module: OutboundFilters
+ * @arete-risk: moderate - Filter failures could distort messages or degrade formatting.
+ * @arete-ethics: moderate - Outbound normalization influences transparency and user trust.
+ */
+
+import { logger } from '../../utils/logger.js';
+import { normalizeOutboundLinks } from './normalizeLinks.js';
+import type { OutboundFilter, OutboundFilterResult } from './types.js';
+
+/**
+ * @arete-logger: outboundFilters
+ *
+ * @logs
+ * Outbound filter execution, changes applied, and filter error conditions.
+ *
+ * @impact
+ * Risk: Missing or noisy logs can obscure formatting decisions.
+ * Ethics: Logs touch message metadata and should avoid raw content leakage.
+ */
+const outboundFilterLogger = logger.child({ module: 'outboundFilters' });
+
+// Ordered pipeline so each filter sees the edits from the prior one.
+const outboundFilters: Array<{ name: string; apply: OutboundFilter }> = [
+    { name: 'normalize_links', apply: normalizeOutboundLinks },
+];
+
+export const runOutboundFilters = (content: string): OutboundFilterResult => {
+    let filteredContent = content; // Track intermediate state for each filter.
+    const changeLog: string[] = []; // Final list of changes for logging.
+
+    // Execute each filter in sequence so formatting changes are deterministic.
+    for (const filter of outboundFilters) {
+        try {
+            const result = filter.apply(filteredContent);
+            filteredContent = result.content;
+            if (result.changes.length > 0) {
+                for (const change of result.changes) {
+                    changeLog.push(`${filter.name}:${change}`);
+                }
+            }
+        } catch (error) {
+            // Fail open: log the failure and proceed to the next filter.
+            outboundFilterLogger.error('Outbound filter failed; continuing', {
+                filter: filter.name,
+                error: (error as Error)?.message ?? String(error),
+            });
+        }
+    }
+
+    // Log only the change summary; avoid raw message bodies or identifiers.
+    outboundFilterLogger.debug('Outbound filters evaluated', {
+        // TODO: Pseudonymize change summaries if they later include identifiers.
+        changes: changeLog,
+    });
+
+    return { content: filteredContent, changes: changeLog };
+};
+
+export type { OutboundFilter, OutboundFilterResult } from './types.js';
@@ -0,0 +1,214 @@
+/**
+ * @description: Normalizes outbound URLs into Markdown autolinks (<url>) without reflowing formatting.
+ * @arete-scope: interface
+ * @arete-module: NormalizeOutboundLinks
+ * @arete-risk: moderate - Linkification errors can distort meaning or intent.
+ * @arete-ethics: moderate - Formatting changes shape user interpretation and trust.
+ */
+
+// used only to run a Markdown parse so we can target edits by source offsets (no re-serialization/reflow).
+import { unified } from 'unified';
+
+// turns Markdown into an mdast AST with positional info, letting us identify “do not touch” spans (links/code/etc.).
+import remarkParse from 'remark-parse';
+
+// walks the AST so we can collect protected ranges and avoid editing inside Markdown constructs.
+import { visit } from 'unist-util-visit';
+
+// provides robust URL detection in plain text (punctuation/parentheses/etc.) without maintaining a regex.
+import LinkifyIt from 'linkify-it';
+
+// the AST type produced by remark-parse.
+import type { Root } from 'mdast';
+
+// the base type for nodes visited in the AST walker.
+import type { Node } from 'unist';
+
+// this filter’s return contract: { content, changes } for pipeline logging/telemetry.
+import type { OutboundFilterResult } from './types.js';
+
+// represents a portion of the scanned text.
+interface TextRange {
+    start: number;
+    end: number;
+}
+
+// Linkify is scoped to this module to keep behavior consistent and testable.
+const linkify = new LinkifyIt();
+
+// Node types that should never be rewritten by the outbound normalizer.
+// https://www.npmjs.com/package/mdast
+const PROTECTED_NODE_TYPES = new Set<string>([
+    'link',
+    'linkReference',
+    'definition',
+    'inlineCode',
+    'code',
+    'html',
+    'image',
+    'imageReference',
+]);
+
+/**
+ * Wraps bare URLs in "<...>" so Markdown renders them as links.
+ * Note: Discord suppresses embeds for links formatted this way.
+ *
+ * What changes:
+ * - For each URL we detect in normal text, we wrap it with "<" and ">" only.
+ * - The only new characters we add are those angle brackets.
+ *
+ * What counts as a plain-text URL:
+ * - It appears in normal text, and NOT inside existing Markdown links/images,
+ *   code blocks, inline code, raw HTML, or reference definitions.
+ * - We only run this filter when the message contains "http://" or "https://"
+ * - Reflow is avoided so line breaks and original formatting stay intact.
+ *
+ * How:
+ * - We parse first to find protected ranges (links, images, code, definitions, HTML),
+ *   then scan only the remaining text outside those ranges.
+ * - URL detection is handled by linkify-it so we don't maintain our own edge-case
+ *   rules (punctuation, parentheses, trailing periods, etc.).
+ */
+export const normalizeOutboundLinks = (
+    content: string
+): OutboundFilterResult => {
+    if (!content) {
+        return { content, changes: [] };
+    }
+
+    // Fast path: skip parsing when there are no http(s) URLs to normalize.
+    if (!content.includes('http://') && !content.includes('https://')) {
+        return { content, changes: [] };
+    }
+
+    // Parse content to find protected regions we must not modify.
+    const tree = unified().use(remarkParse).parse(content) as Root;
+    const protectedRanges = collectProtectedRanges(tree, content.length);
+
+    const { text: normalized, count } = linkifyWithProtectedRanges(
+        content,
+        protectedRanges
+    );
+
+    // Emit a compact summary for logging rather than per-link detail.
+    const changes = count > 0 ? [`wrapped_urls:${count}`] : [];
+    return { content: normalized, changes };
+};
+
+// Collect source ranges that should NOT be modified (see PROTECTED_NODE_TYPES)
+const collectProtectedRanges = (tree: Root, maxLength: number): TextRange[] => {
+    const ranges: TextRange[] = [];
+
+    visit(tree, (node: Node) => {
+        if (!PROTECTED_NODE_TYPES.has(node.type)) {
+            return;
+        }
+
+        const start = node.position?.start?.offset;
+        const end = node.position?.end?.offset;
+        if (typeof start !== 'number' || typeof end !== 'number') {
+            return;
+        }
+
+        const clampedStart = Math.max(0, Math.min(start, maxLength));
+        const clampedEnd = Math.max(0, Math.min(end, maxLength));
+        if (clampedEnd <= clampedStart) {
+            return;
+        }
+
+        ranges.push({ start: clampedStart, end: clampedEnd });
+    });
+
+    return mergeRanges(ranges);
+};
+
+// Merge overlapping ranges so we can scan the content efficiently.
+const mergeRanges = (ranges: TextRange[]): TextRange[] => {
+    if (ranges.length === 0) {
+        return [];
+    }
+
+    const sorted = [...ranges].sort((first, second) => {
+        if (first.start !== second.start) {
+            return first.start - second.start;
+        }
+        return first.end - second.end;
+    });
+
+    const merged: TextRange[] = [{ ...sorted[0] }];
+
+    for (const range of sorted.slice(1)) {
+        const last = merged[merged.length - 1];
+        if (range.start <= last.end) {
+            last.end = Math.max(last.end, range.end);
+        } else {
+            merged.push({ ...range });
+        }
+    }
+
+    return merged;
+};
+
+// Apply linkification to content slices that are not protected.
+const linkifyWithProtectedRanges = (
+    content: string,
+    ranges: TextRange[]
+): { text: string; count: number } => {
+    if (ranges.length === 0) {
+        return linkifySegment(content);
+    }
+
+    let cursor = 0;
+    let output = '';
+    let total = 0;
+
+    for (const range of ranges) {
+        if (range.start > cursor) {
+            const segment = content.slice(cursor, range.start);
+            const { text, count } = linkifySegment(segment);
+            output += text;
+            total += count;
+        }
+
+        output += content.slice(range.start, range.end);
+        cursor = range.end;
+    }
+
+    if (cursor < content.length) {
+        const { text, count } = linkifySegment(content.slice(cursor));
+        output += text;
+        total += count;
+    }
+
+    return { text: output, count: total };
+};
+
+// Convert a single plain-text segment by wrapping detected URLs in autolinks.
+const linkifySegment = (segment: string): { text: string; count: number } => {
+    const matches = linkify.match(segment);
+    if (!matches || matches.length === 0) {
+        return { text: segment, count: 0 };
+    }
+
+    let result = '';
+    let cursor = 0;
+    let count = 0;
+
+    for (const match of matches) {
+        const start = match.index ?? 0;
+        const end = match.lastIndex ?? start;
+
+        if (start > cursor) {
+            result += segment.slice(cursor, start);
+        }
+
+        const raw = match.raw ?? match.text ?? segment.slice(start, end);
+        const url = raw || match.url;
+        result += `<${url}>`;
+        count += 1;
+        cursor = end;
+    }
+
+    result += segment.slice(cursor);
+    return { text: result, count };
+};
@@ -0,0 +1,15 @@
+/**
+ * @description: Shared types for outbound message filters and pipeline composition.
+ * @arete-scope: interface
+ * @arete-module: OutboundFilterTypes
+ * @arete-risk: low - Typing mismatches could hide filter output errors.
+ * @arete-ethics: low - Type safety affects developer clarity more than user impact.
+ */
+
+export interface OutboundFilterResult {
+    content: string;
+    changes: string[];
+}
+
+// Outbound filters operate on plain text and describe their edits for logging.
+export type OutboundFilter = (content: string) => OutboundFilterResult;