fix(telegram): keep .co domains linked and wrap punctuated file refs

fix(telegram): clamp depth counters and add anchor tracking to orphaned pass
- Clamp depth counters at 0 for malformed HTML with stray closing tags - Add anchor depth tracking to orphaned TLD pass to prevent wrapping inside link text (e.g., <a href="...">R&D.md</a>) - 57 tests covering all edge cases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-14 00:26:57 +01:00 · 2026-02-05 13:11:18 +03:00 · 2026-02-05 12:55:06 +03:00 · 2026-02-05 12:41:08 +03:00 · 2026-02-05 12:23:24 +03:00 · 2026-02-05 12:03:43 +03:00
3 changed files with 615 additions and 5 deletions
--- a/src/telegram/bot/delivery.ts
+++ b/src/telegram/bot/delivery.ts
@@ -18,6 +18,7 @@ import {
  markdownToTelegramChunks,
  markdownToTelegramHtml,
  renderTelegramHtmlText,
+  wrapFileReferencesInHtml,
 } from "../format.js";
 import { buildInlineKeyboard } from "../send.js";
 import { cacheSticker, getCachedSticker } from "../sticker-cache.js";
@@ -76,7 +77,9 @@ export async function deliverReplies(params: {
      const nested = markdownToTelegramChunks(chunk, textLimit, { tableMode: params.tableMode });
      if (!nested.length && chunk) {
        chunks.push({
-          html: markdownToTelegramHtml(chunk, { tableMode: params.tableMode }),
+          html: wrapFileReferencesInHtml(
+            markdownToTelegramHtml(chunk, { tableMode: params.tableMode, wrapFileRefs: false }),
+          ),
          text: chunk,
        });
        continue;
--- a/src/telegram/format.ts
+++ b/src/telegram/format.ts
@@ -20,7 +20,56 @@ function escapeHtmlAttr(text: string): string {
  return escapeHtml(text).replace(/"/g, "&quot;");
 }

-function buildTelegramLink(link: MarkdownLinkSpan, _text: string) {
+/**
+ * File extensions that share TLDs and commonly appear in code/documentation.
+ * These are wrapped in <code> tags to prevent Telegram from generating
+ * spurious domain registrar previews.
+ *
+ * Only includes extensions that are:
+ * 1. Commonly used as file extensions in code/docs
+ * 2. Rarely used as intentional domain references
+ *
+ * Excluded: .ai, .io, .tv, .fm (popular domain TLDs like x.ai, vercel.io, github.io)
+ */
+const FILE_EXTENSIONS_WITH_TLD = new Set([
+  "md", // Markdown (Moldova) - very common in repos
+  "go", // Go language - common in Go projects
+  "py", // Python (Paraguay) - common in Python projects
+  "pl", // Perl (Poland) - common in Perl projects
+  "sh", // Shell (Saint Helena) - common for scripts
+  "am", // Automake files (Armenia)
+  "at", // Assembly (Austria)
+  "be", // Backend files (Belgium)
+  "cc", // C++ source (Cocos Islands)
+]);
+
+/** Detects when markdown-it linkify auto-generated a link from a bare filename (e.g. README.md → http://README.md) */
+function isAutoLinkedFileRef(href: string, label: string): boolean {
+  const stripped = href.replace(/^https?:\/\//i, "");
+  if (stripped !== label) {
+    return false;
+  }
+  const dotIndex = label.lastIndexOf(".");
+  if (dotIndex < 1) {
+    return false;
+  }
+  const ext = label.slice(dotIndex + 1).toLowerCase();
+  if (!FILE_EXTENSIONS_WITH_TLD.has(ext)) {
+    return false;
+  }
+  // Reject if any path segment before the filename contains a dot (looks like a domain)
+  const segments = label.split("/");
+  if (segments.length > 1) {
+    for (let i = 0; i < segments.length - 1; i++) {
+      if (segments[i].includes(".")) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+function buildTelegramLink(link: MarkdownLinkSpan, text: string) {
  const href = link.href.trim();
  if (!href) {
    return null;
@@ -28,6 +77,11 @@ function buildTelegramLink(link: MarkdownLinkSpan, _text: string) {
  if (link.start === link.end) {
    return null;
  }
+  // Suppress auto-linkified file references (e.g. README.md → http://README.md)
+  const label = text.slice(link.start, link.end);
+  if (isAutoLinkedFileRef(href, label)) {
+    return null;
+  }
  const safeHref = escapeHtmlAttr(href);
  return {
    start: link.start,
@@ -53,7 +107,7 @@ function renderTelegramHtml(ir: MarkdownIR): string {

 export function markdownToTelegramHtml(
  markdown: string,
-  options: { tableMode?: MarkdownTableMode } = {},
+  options: { tableMode?: MarkdownTableMode; wrapFileRefs?: boolean } = {},
 ): string {
  const ir = markdownToIR(markdown ?? "", {
    linkify: true,
@@ -61,7 +115,154 @@ export function markdownToTelegramHtml(
    blockquotePrefix: "",
    tableMode: options.tableMode,
  });
-  return renderTelegramHtml(ir);
+  const html = renderTelegramHtml(ir);
+  // Apply file reference wrapping if requested (for chunked rendering)
+  if (options.wrapFileRefs !== false) {
+    return wrapFileReferencesInHtml(html);
+  }
+  return html;
+}
+
+/**
+ * Wraps standalone file references (with TLD extensions) in <code> tags.
+ * This prevents Telegram from treating them as URLs and generating
+ * irrelevant domain registrar previews.
+ *
+ * Runs AFTER markdown→HTML conversion to avoid modifying HTML attributes.
+ * Skips content inside <code>, <pre>, and <a> tags to avoid nesting issues.
+ */
+/** Escape regex metacharacters in a string */
+function escapeRegex(str: string): string {
+  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+export function wrapFileReferencesInHtml(html: string): string {
+  // Build regex pattern for all tracked extensions (escape metacharacters for safety)
+  const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).map(escapeRegex).join("|");
+
+  // Safety-net: de-linkify auto-generated anchors where href="http://<label>" (defense in depth for textMode: "html")
+  const autoLinkedAnchor = /<a\s+href="https?:\/\/([^"]+)"[^>]*>\1<\/a>/gi;
+  html = html.replace(autoLinkedAnchor, (_match, label: string) => {
+    if (!isAutoLinkedFileRef(`http://${label}`, label)) {
+      return _match;
+    }
+    return `<code>${escapeHtml(label)}</code>`;
+  });
+  const filePattern = new RegExp(
+    `(^|[^a-zA-Z0-9_\\-/])([a-zA-Z0-9_.\\-./]+\\.(?:${extensionsPattern}))(?=$|[^a-zA-Z0-9_\\-/])`,
+    "gi",
+  );
+
+  // Track nesting depth for tags that should not be modified
+  let codeDepth = 0;
+  let preDepth = 0;
+  let anchorDepth = 0;
+  let result = "";
+  let lastIndex = 0;
+
+  // Process the HTML token by token to respect tag boundaries
+  const tagPattern = /(<\/?)(code|pre|a)\b[^>]*?>/gi;
+  let match: RegExpExecArray | null;
+
+  while ((match = tagPattern.exec(html)) !== null) {
+    const tagStart = match.index;
+    const tagEnd = tagPattern.lastIndex;
+    const isClosing = match[1] === "</";
+    const tagName = match[2].toLowerCase();
+
+    // Process text before this tag
+    const textBefore = html.slice(lastIndex, tagStart);
+    result += textBefore.replace(filePattern, (m, prefix, filename, offset, source) => {
+      // Skip if inside protected tags or if it's a URL
+      if (codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
+        return m;
+      }
+      // Skip if we're inside any HTML tag (e.g., attributes on tags other than code/pre/a)
+      const filenameOffset = Number(offset) + String(prefix).length;
+      const lastOpen = String(source).lastIndexOf("<", filenameOffset);
+      const lastClose = String(source).lastIndexOf(">", filenameOffset);
+      if (lastOpen > lastClose) {
+        return m;
+      }
+      if (filename.startsWith("//")) {
+        return m;
+      }
+      if (/https?:\/\/$/i.test(prefix)) {
+        return m;
+      }
+      return `${prefix}<code>${escapeHtml(filename)}</code>`;
+    });
+
+    // Update tag depth (clamp at 0 for malformed HTML with stray closing tags)
+    if (tagName === "code") {
+      codeDepth = isClosing ? Math.max(0, codeDepth - 1) : codeDepth + 1;
+    } else if (tagName === "pre") {
+      preDepth = isClosing ? Math.max(0, preDepth - 1) : preDepth + 1;
+    } else if (tagName === "a") {
+      anchorDepth = isClosing ? Math.max(0, anchorDepth - 1) : anchorDepth + 1;
+    }
+
+    // Add the tag itself
+    result += html.slice(tagStart, tagEnd);
+    lastIndex = tagEnd;
+  }
+
+  // Process remaining text
+  const remainingText = html.slice(lastIndex);
+  result += remainingText.replace(filePattern, (m, prefix, filename, offset, source) => {
+    if (codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
+      return m;
+    }
+    const filenameOffset = Number(offset) + String(prefix).length;
+    const lastOpen = String(source).lastIndexOf("<", filenameOffset);
+    const lastClose = String(source).lastIndexOf(">", filenameOffset);
+    if (lastOpen > lastClose) {
+      return m;
+    }
+    if (filename.startsWith("//")) {
+      return m;
+    }
+    if (/https?:\/\/$/i.test(prefix)) {
+      return m;
+    }
+    return `${prefix}<code>${escapeHtml(filename)}</code>`;
+  });
+
+  // Second pass: catch orphaned single-letter TLD patterns (e.g., 'D.md' in 'R&D.md')
+  // These can be auto-linked by Telegram as domains
+  const orphanedTldPattern = new RegExp(
+    `([^a-zA-Z0-9]|^)([A-Za-z]\\.(?:${extensionsPattern}))(?=[^a-zA-Z0-9/]|$)`,
+    "g",
+  );
+  // Snapshot for offset calculations (offset is relative to pre-replacement string)
+  // Note: replace() doesn't mutate, but snapshot makes intent explicit
+  const snapshot = result;
+  result = snapshot.replace(orphanedTldPattern, (m, prefix, tld, offset) => {
+    // Skip if prefix is > (right after a tag close)
+    if (prefix === ">") {
+      return m;
+    }
+    // Skip if we're inside an HTML tag (between < and >)
+    const lastOpen = snapshot.lastIndexOf("<", offset);
+    const lastClose = snapshot.lastIndexOf(">", offset);
+    if (lastOpen > lastClose) {
+      return m; // Inside a tag attribute
+    }
+    // Skip if inside code/pre/anchor tags (count opens vs closes before offset)
+    const textBefore = snapshot.slice(0, offset);
+    const codeOpens = (textBefore.match(/<code/gi) || []).length;
+    const codeCloses = (textBefore.match(/<\/code/gi) || []).length;
+    const preOpens = (textBefore.match(/<pre/gi) || []).length;
+    const preCloses = (textBefore.match(/<\/pre/gi) || []).length;
+    const anchorOpens = (textBefore.match(/<a[\s>]/gi) || []).length;
+    const anchorCloses = (textBefore.match(/<\/a/gi) || []).length;
+    if (codeOpens > codeCloses || preOpens > preCloses || anchorOpens > anchorCloses) {
+      return m; // Inside code/pre/anchor content
+    }
+    return `${prefix}<code>${escapeHtml(tld)}</code>`;
+  });
+
+  return result;
 }

 export function renderTelegramHtmlText(
@@ -70,8 +271,10 @@ export function renderTelegramHtmlText(
 ): string {
  const textMode = options.textMode ?? "markdown";
  if (textMode === "html") {
+    // For HTML mode, trust caller markup - don't modify
    return text;
  }
+  // markdownToTelegramHtml already wraps file references by default
  return markdownToTelegramHtml(text, { tableMode: options.tableMode });
 }

@@ -88,7 +291,7 @@ export function markdownToTelegramChunks(
  });
  const chunks = chunkMarkdownIR(ir, limit);
  return chunks.map((chunk) => ({
-    html: renderTelegramHtml(chunk),
+    html: wrapFileReferencesInHtml(renderTelegramHtml(chunk)),
    text: chunk.text,
  }));
 }
--- a/src/telegram/format.wrap-md.test.ts
+++ b/src/telegram/format.wrap-md.test.ts
@@ -0,0 +1,404 @@
+import { describe, expect, it } from "vitest";
+import {
+  markdownToTelegramChunks,
+  markdownToTelegramHtml,
+  renderTelegramHtmlText,
+  wrapFileReferencesInHtml,
+} from "./format.js";
+
+describe("wrapFileReferencesInHtml", () => {
+  it("wraps .md filenames in code tags", () => {
+    expect(wrapFileReferencesInHtml("Check README.md")).toContain("Check <code>README.md</code>");
+    expect(wrapFileReferencesInHtml("See HEARTBEAT.md for status")).toContain(
+      "See <code>HEARTBEAT.md</code> for status",
+    );
+  });
+
+  it("wraps .go filenames", () => {
+    expect(wrapFileReferencesInHtml("Check main.go")).toContain("Check <code>main.go</code>");
+  });
+
+  it("wraps .py filenames", () => {
+    expect(wrapFileReferencesInHtml("Run script.py")).toContain("Run <code>script.py</code>");
+  });
+
+  it("wraps .pl filenames", () => {
+    expect(wrapFileReferencesInHtml("Check backup.pl")).toContain("Check <code>backup.pl</code>");
+  });
+
+  it("wraps .sh filenames", () => {
+    expect(wrapFileReferencesInHtml("Run backup.sh")).toContain("Run <code>backup.sh</code>");
+  });
+
+  it("wraps file paths", () => {
+    expect(wrapFileReferencesInHtml("Look at squad/friday/HEARTBEAT.md")).toContain(
+      "Look at <code>squad/friday/HEARTBEAT.md</code>",
+    );
+  });
+
+  it("does not wrap inside existing code tags", () => {
+    const input = "Already <code>wrapped.md</code> here";
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toBe(input);
+    expect(result).not.toContain("<code><code>");
+  });
+
+  it("does not wrap inside pre tags", () => {
+    const input = "<pre><code>README.md</code></pre>";
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toBe(input);
+  });
+
+  it("does not wrap inside anchor tags", () => {
+    const input = '<a href="README.md">Link</a>';
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toBe(input);
+  });
+
+  it("does not wrap file refs inside real URL anchor tags", () => {
+    const input = 'Visit <a href="https://example.com/README.md">example.com/README.md</a>';
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toBe(input);
+  });
+
+  it("handles mixed content correctly", () => {
+    const result = wrapFileReferencesInHtml("Check README.md and CONTRIBUTING.md");
+    expect(result).toContain("<code>README.md</code>");
+    expect(result).toContain("<code>CONTRIBUTING.md</code>");
+  });
+
+  it("handles edge cases", () => {
+    expect(wrapFileReferencesInHtml("No markdown files here")).not.toContain("<code>");
+    expect(wrapFileReferencesInHtml("File.md at start")).toContain("<code>File.md</code>");
+    expect(wrapFileReferencesInHtml("Ends with file.md")).toContain("<code>file.md</code>");
+  });
+
+  it("wraps file refs with punctuation boundaries", () => {
+    expect(wrapFileReferencesInHtml("See README.md.")).toContain("<code>README.md</code>.");
+    expect(wrapFileReferencesInHtml("See README.md,")).toContain("<code>README.md</code>,");
+    expect(wrapFileReferencesInHtml("(README.md)")).toContain("(<code>README.md</code>)");
+    expect(wrapFileReferencesInHtml("README.md:")).toContain("<code>README.md</code>:");
+  });
+
+  it("de-linkifies auto-linkified file ref anchors", () => {
+    const input = '<a href="http://README.md">README.md</a>';
+    expect(wrapFileReferencesInHtml(input)).toBe("<code>README.md</code>");
+  });
+
+  it("de-linkifies auto-linkified path anchors", () => {
+    const input = '<a href="http://squad/friday/HEARTBEAT.md">squad/friday/HEARTBEAT.md</a>';
+    expect(wrapFileReferencesInHtml(input)).toBe("<code>squad/friday/HEARTBEAT.md</code>");
+  });
+
+  it("preserves explicit links where label differs from href", () => {
+    const input = '<a href="http://README.md">click here</a>';
+    expect(wrapFileReferencesInHtml(input)).toBe(input);
+  });
+
+  it("wraps file ref after closing anchor tag", () => {
+    const input = '<a href="https://example.com">link</a> then README.md';
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toContain("</a> then <code>README.md</code>");
+  });
+});
+
+describe("renderTelegramHtmlText - file reference wrapping", () => {
+  it("wraps file references in markdown mode", () => {
+    const result = renderTelegramHtmlText("Check README.md");
+    expect(result).toContain("<code>README.md</code>");
+  });
+
+  it("does not wrap in HTML mode (trusts caller markup)", () => {
+    // textMode: "html" should pass through unchanged - caller owns the markup
+    const result = renderTelegramHtmlText("Check README.md", { textMode: "html" });
+    expect(result).toBe("Check README.md");
+    expect(result).not.toContain("<code>");
+  });
+
+  it("does not double-wrap already code-formatted content", () => {
+    const result = renderTelegramHtmlText("Already `wrapped.md` here");
+    // Should have code tags but not nested
+    expect(result).toContain("<code>");
+    expect(result).not.toContain("<code><code>");
+  });
+});
+
+describe("markdownToTelegramHtml - file reference wrapping", () => {
+  it("wraps file references by default", () => {
+    const result = markdownToTelegramHtml("Check README.md");
+    expect(result).toContain("<code>README.md</code>");
+  });
+
+  it("can skip wrapping when requested", () => {
+    const result = markdownToTelegramHtml("Check README.md", { wrapFileRefs: false });
+    expect(result).not.toContain("<code>README.md</code>");
+  });
+
+  it("wraps multiple file types in a single message", () => {
+    const result = markdownToTelegramHtml("Edit main.go and script.py");
+    expect(result).toContain("<code>main.go</code>");
+    expect(result).toContain("<code>script.py</code>");
+  });
+
+  it("preserves real URLs as anchor tags", () => {
+    const result = markdownToTelegramHtml("Visit https://example.com");
+    expect(result).toContain('<a href="https://example.com">');
+  });
+
+  it("preserves explicit markdown links even when href looks like a file ref", () => {
+    const result = markdownToTelegramHtml("[docs](http://README.md)");
+    expect(result).toContain('<a href="http://README.md">docs</a>');
+  });
+
+  it("wraps file ref after real URL in same message", () => {
+    const result = markdownToTelegramHtml("Visit https://example.com and README.md");
+    expect(result).toContain('<a href="https://example.com">');
+    expect(result).toContain("<code>README.md</code>");
+  });
+});
+
+describe("markdownToTelegramChunks - file reference wrapping", () => {
+  it("wraps file references in chunked output", () => {
+    const chunks = markdownToTelegramChunks("Check README.md and backup.sh", 4096);
+    expect(chunks.length).toBeGreaterThan(0);
+    expect(chunks[0].html).toContain("<code>README.md</code>");
+    expect(chunks[0].html).toContain("<code>backup.sh</code>");
+  });
+});
+
+describe("edge cases", () => {
+  it("wraps file ref inside bold tags", () => {
+    const result = markdownToTelegramHtml("**README.md**");
+    expect(result).toBe("<b><code>README.md</code></b>");
+  });
+
+  it("wraps file ref inside italic tags", () => {
+    const result = markdownToTelegramHtml("*script.py*");
+    expect(result).toBe("<i><code>script.py</code></i>");
+  });
+
+  it("does not wrap inside fenced code blocks", () => {
+    const result = markdownToTelegramHtml("```\nREADME.md\n```");
+    expect(result).toBe("<pre><code>README.md\n</code></pre>");
+    expect(result).not.toContain("<code><code>");
+  });
+
+  it("preserves domain-like paths as anchor tags", () => {
+    const result = markdownToTelegramHtml("example.com/README.md");
+    expect(result).toContain('<a href="http://example.com/README.md">');
+    expect(result).not.toContain("<code>");
+  });
+
+  it("preserves github URLs with file paths", () => {
+    const result = markdownToTelegramHtml("https://github.com/foo/README.md");
+    expect(result).toContain('<a href="https://github.com/foo/README.md">');
+  });
+
+  it("handles wrapFileRefs: false (plain text output)", () => {
+    const result = markdownToTelegramHtml("README.md", { wrapFileRefs: false });
+    // buildTelegramLink returns null, so no <a> tag; wrapFileRefs: false skips <code>
+    expect(result).toBe("README.md");
+  });
+
+  it("wraps supported TLD extensions (.am, .at, .be, .cc)", () => {
+    const result = markdownToTelegramHtml("Makefile.am and code.at and app.be and main.cc");
+    expect(result).toContain("<code>Makefile.am</code>");
+    expect(result).toContain("<code>code.at</code>");
+    expect(result).toContain("<code>app.be</code>");
+    expect(result).toContain("<code>main.cc</code>");
+  });
+
+  it("does not wrap popular domain TLDs (.ai, .io, .tv, .fm)", () => {
+    // These are commonly used as real domains (x.ai, vercel.io, github.io)
+    const result = markdownToTelegramHtml("Check x.ai and vercel.io and app.tv and radio.fm");
+    // Should be links, not code
+    expect(result).toContain('<a href="http://x.ai">');
+    expect(result).toContain('<a href="http://vercel.io">');
+    expect(result).toContain('<a href="http://app.tv">');
+    expect(result).toContain('<a href="http://radio.fm">');
+  });
+
+  it("keeps .co domains as links", () => {
+    const result = markdownToTelegramHtml("Visit t.co and openclaw.co");
+    expect(result).toContain('<a href="http://t.co">');
+    expect(result).toContain('<a href="http://openclaw.co">');
+    expect(result).not.toContain("<code>t.co</code>");
+    expect(result).not.toContain("<code>openclaw.co</code>");
+  });
+
+  it("does not wrap non-TLD extensions", () => {
+    const result = markdownToTelegramHtml("image.png and style.css and script.js");
+    expect(result).not.toContain("<code>image.png</code>");
+    expect(result).not.toContain("<code>style.css</code>");
+    expect(result).not.toContain("<code>script.js</code>");
+  });
+
+  it("handles file ref at start of message", () => {
+    const result = markdownToTelegramHtml("README.md is important");
+    expect(result).toBe("<code>README.md</code> is important");
+  });
+
+  it("handles file ref at end of message", () => {
+    const result = markdownToTelegramHtml("Check the README.md");
+    expect(result).toBe("Check the <code>README.md</code>");
+  });
+
+  it("handles multiple file refs in sequence", () => {
+    const result = markdownToTelegramHtml("README.md CHANGELOG.md LICENSE.md");
+    expect(result).toContain("<code>README.md</code>");
+    expect(result).toContain("<code>CHANGELOG.md</code>");
+    expect(result).toContain("<code>LICENSE.md</code>");
+  });
+
+  it("handles nested path without domain-like segments", () => {
+    const result = markdownToTelegramHtml("src/utils/helpers/format.go");
+    expect(result).toContain("<code>src/utils/helpers/format.go</code>");
+  });
+
+  it("wraps path with version-like segment (not a domain)", () => {
+    // v1.0/README.md is not linkified by markdown-it (no TLD), so it's wrapped
+    const result = markdownToTelegramHtml("v1.0/README.md");
+    expect(result).toContain("<code>v1.0/README.md</code>");
+  });
+
+  it("preserves domain path with version segment", () => {
+    // example.com/v1.0/README.md IS linkified (has domain), preserved as link
+    const result = markdownToTelegramHtml("example.com/v1.0/README.md");
+    expect(result).toContain('<a href="http://example.com/v1.0/README.md">');
+  });
+
+  it("handles file ref with hyphen and underscore in name", () => {
+    const result = markdownToTelegramHtml("my-file_name.md");
+    expect(result).toContain("<code>my-file_name.md</code>");
+  });
+
+  it("handles uppercase extensions", () => {
+    const result = markdownToTelegramHtml("README.MD and SCRIPT.PY");
+    expect(result).toContain("<code>README.MD</code>");
+    expect(result).toContain("<code>SCRIPT.PY</code>");
+  });
+
+  it("handles nested code tags (depth tracking)", () => {
+    // Nested <code> inside <pre> - should not wrap inner content
+    const input = "<pre><code>README.md</code></pre> then script.py";
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toBe("<pre><code>README.md</code></pre> then <code>script.py</code>");
+  });
+
+  it("handles multiple anchor tags in sequence", () => {
+    const input =
+      '<a href="https://a.com">link1</a> README.md <a href="https://b.com">link2</a> script.py';
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toContain("</a> <code>README.md</code> <a");
+    expect(result).toContain("</a> <code>script.py</code>");
+  });
+
+  it("handles auto-linked anchor with backreference match", () => {
+    // The regex uses \1 backreference - href must equal label
+    const input = '<a href="http://README.md">README.md</a>';
+    expect(wrapFileReferencesInHtml(input)).toBe("<code>README.md</code>");
+  });
+
+  it("preserves anchor when href and label differ (no backreference match)", () => {
+    // Different href and label - should NOT de-linkify
+    const input = '<a href="http://other.md">README.md</a>';
+    expect(wrapFileReferencesInHtml(input)).toBe(input);
+  });
+
+  it("wraps orphaned TLD pattern after special character", () => {
+    // R&D.md - the & breaks the main pattern, but D.md could be auto-linked
+    // So we wrap the orphaned D.md part to prevent Telegram linking it
+    const input = "R&D.md";
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toBe("R&<code>D.md</code>");
+  });
+
+  it("wraps orphaned single-letter TLD patterns", () => {
+    // Use extensions still in the set (md, sh, py, go)
+    const result1 = wrapFileReferencesInHtml("X.md is cool");
+    expect(result1).toContain("<code>X.md</code>");
+
+    const result2 = wrapFileReferencesInHtml("Check R.sh");
+    expect(result2).toContain("<code>R.sh</code>");
+  });
+
+  it("does not match filenames containing angle brackets", () => {
+    // The regex character class [a-zA-Z0-9_.\\-./] doesn't include < >
+    // so these won't be matched and wrapped (which is correct/safe)
+    const input = "file<script>.md";
+    const result = wrapFileReferencesInHtml(input);
+    // Not wrapped because < breaks the filename pattern
+    expect(result).toBe(input);
+  });
+
+  it("wraps file ref before unrelated HTML tags", () => {
+    // x.md followed by unrelated closing tag and bold - wrap the file ref only
+    const input = "x.md <b>bold</b>";
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toBe("<code>x.md</code> <b>bold</b>");
+  });
+
+  it("does not wrap orphaned TLD inside existing code tags", () => {
+    // R&D.md is already inside <code>, orphaned pass should NOT wrap D.md again
+    const input = "<code>R&D.md</code>";
+    const result = wrapFileReferencesInHtml(input);
+    // Should remain unchanged - no nested code tags
+    expect(result).toBe(input);
+    expect(result).not.toContain("<code><code>");
+    expect(result).not.toContain("</code></code>");
+  });
+
+  it("does not wrap orphaned TLD inside anchor link text", () => {
+    // R&D.md inside anchor text should NOT have D.md wrapped
+    const input = '<a href="https://example.com">R&D.md</a>';
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toBe(input);
+    expect(result).not.toContain("<code>D.md</code>");
+  });
+
+  it("handles malformed HTML with stray closing tags (negative depth)", () => {
+    // Stray </code> before content shouldn't break protection logic
+    // (depth should clamp at 0, not go negative)
+    const input = "</code>README.md<code>inside</code> after.md";
+    const result = wrapFileReferencesInHtml(input);
+    // README.md should be wrapped (codeDepth = 0 after clamping stray close)
+    expect(result).toContain("<code>README.md</code>");
+    // after.md should be wrapped (codeDepth = 0 after proper close)
+    expect(result).toContain("<code>after.md</code>");
+    // Should not have nested code tags
+    expect(result).not.toContain("<code><code>");
+  });
+
+  it("does not wrap orphaned TLD inside href attributes", () => {
+    // D.md inside href should NOT be wrapped
+    const input = '<a href="http://example.com/R&D.md">link</a>';
+    const result = wrapFileReferencesInHtml(input);
+    // href should be untouched
+    expect(result).toBe(input);
+    expect(result).not.toContain("<code>D.md</code>");
+  });
+
+  it("does not wrap orphaned TLD inside any HTML attribute", () => {
+    const input = '<img src="logo/R&D.md" alt="R&D.md">';
+    const result = wrapFileReferencesInHtml(input);
+    expect(result).toBe(input);
+  });
+
+  it("handles multiple orphaned TLDs with HTML tags (offset stability)", () => {
+    // This tests the bug where offset is relative to pre-replacement string
+    // but we were checking against the mutating result string
+    const input = '<a href="http://A.md">link</a> B.md <span title="C.sh">text</span> D.py';
+    const result = wrapFileReferencesInHtml(input);
+    // A.md in href should NOT be wrapped (inside attribute)
+    // B.md outside tags SHOULD be wrapped
+    // C.sh in title attribute should NOT be wrapped
+    // D.py outside tags SHOULD be wrapped
+    expect(result).toContain("<code>B.md</code>");
+    expect(result).toContain("<code>D.py</code>");
+    expect(result).not.toContain("<code>A.md</code>");
+    expect(result).not.toContain("<code>C.sh</code>");
+    // Attributes should be unchanged
+    expect(result).toContain('href="http://A.md"');
+    expect(result).toContain('title="C.sh"');
+  });
+});
Author	SHA1	Message	Date
Peter Steinberger	49d7055d12	fix(telegram): keep .co domains linked and wrap punctuated file refs	2026-02-14 00:26:57 +01:00
divanoli	6bf2ced2f5	fix(telegram): clamp depth counters and add anchor tracking to orphaned pass - Clamp depth counters at 0 for malformed HTML with stray closing tags - Add anchor depth tracking to orphaned TLD pass to prevent wrapping inside link text (e.g., <a href="...">R&D.md</a>) - 57 tests covering all edge cases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-02-05 13:11:18 +03:00
divanoli	e32dcb566d	fix(telegram): prevent orphaned TLD wrapping inside code/pre tags - Add depth tracking for code/pre tags in orphaned TLD pass - Fix test to expect valid HTML output - 55 tests now covering nested tag scenarios Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-02-05 12:55:06 +03:00
divanoli	90974e1030	refactor(telegram): use snapshot for orphaned TLD offset clarity Use explicit snapshot variable when checking tag positions in orphaned TLD pass. While JavaScript's replace() doesn't mutate during iteration, this makes intent explicit and adds test coverage for multi-TLD HTML. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-02-05 12:41:08 +03:00
divanoli	352398b9a5	fix(telegram): prevent orphaned TLD wrapping inside HTML tags Code review fixes: 1. Orphaned TLD pass now checks if match is inside HTML tag - Uses lastIndexOf('<') vs lastIndexOf('>') to detect tag context - Skips wrapping when between < and > (inside attributes) - Prevents invalid HTML like <a href="...&<code>D.md</code>"> 2. textMode: 'html' now trusts caller markup - Returns text unchanged instead of wrapping - Caller owns HTML structure in this mode Tests added: - 'does not wrap orphaned TLD inside href attributes' - 'does not wrap orphaned TLD inside any HTML attribute' - 'does not wrap in HTML mode (trusts caller markup)'	2026-02-05 12:23:24 +03:00
divanoli	94851de4f8	refactor(telegram): remove popular domain TLDs from file extension list Remove .ai, .io, .tv, .fm from FILE_EXTENSIONS_WITH_TLD because: - These are commonly used as real domains (x.ai, vercel.io, github.io) - Rarely used as actual file extensions - Users are more likely referring to websites than files Keep: md, sh, py, go, pl (common file extensions, rarely intentional domains) Keep: am, at, be, cc, co (less common as intentional domain references) Update tests to reflect the change: - Add test for supported extensions (.am, .at, .be, .cc, .co) - Add test verifying popular TLDs stay as links	2026-02-05 12:03:43 +03:00
divanoli	38a94ec5fe	fix(telegram): catch orphaned single-letter TLD patterns When text like 'R&D.md' doesn't match the main file pattern (because & breaks the character class), the 'D.md' part can still be auto-linked by Telegram as a domain (https://d.md/). Add second pass to catch orphaned TLD patterns like 'D.md', 'R.io', 'X.ai' that follow non-alphanumeric characters and wrap them in <code> tags. Pattern: ([^a-zA-Z0-9]\|^)([A-Za-z]\.(?:extensions))(?=[^a-zA-Z0-9/]\|$) Tests added: - 'wraps orphaned TLD pattern after special character' (R&D.md → R&<code>D.md</code>) - 'wraps orphaned single-letter TLD patterns' (X.ai, R.io)	2026-02-05 11:54:09 +03:00
divanoli	5431591cfa	fix(telegram): add escapeHtml and escapeRegex for defense in depth Code review fixes: 1. Escape filename with escapeHtml() before inserting into <code> tags - Prevents HTML injection if regex ever matches unsafe chars - Defense in depth (current regex already limits to safe chars) 2. Escape extensions with escapeRegex() before joining into pattern - Prevents regex breakage if extensions contain metacharacters - Future-proofs against extensions like 'c++' or 'd.ts' Add tests documenting regex safety boundaries: - Filenames with special chars (&, <, >) don't match - Only [a-zA-Z0-9_.\-./] chars are captured	2026-02-05 11:47:50 +03:00
divanoli	8a5453e3e7	fix(telegram): use regex literal and depth counters for tag tracking Code review fixes: 1. Replace RegExp constructor with regex literal for autoLinkedAnchor - Avoids double-escaping issues with \s - Uses backreference \1 to match href=label pattern directly 2. Replace boolean toggles with depth counters for tag nesting - codeDepth, preDepth, anchorDepth track nesting levels - Correctly handles nested tags like <pre><code>...</code></pre> - Prevents wrapping inside any level of protected tags Add 4 tests for edge cases: - Nested code tags (depth tracking) - Multiple anchor tags in sequence - Auto-linked anchor with backreference match - Anchor with different href/label (no match)	2026-02-05 11:36:58 +03:00
divanoli	58c69ee8bd	test(telegram): add comprehensive edge case coverage for file ref wrapping Add 16 edge case tests covering: - File refs inside bold/italic tags - Fenced code blocks (no double-wrap) - Domain-like paths preserved as links (example.com/README.md) - GitHub URLs with file paths - wrapFileRefs: false behavior - All TLD extensions (.ai, .io, .tv, .fm) - Non-TLD extensions not wrapped (.png, .css, .js) - File ref position (start, end, multiple in sequence) - Nested paths without domain segments - Version-like paths (v1.0/README.md wraps, example.com/v1.0/README.md links) - Hyphens and underscores in filenames - Uppercase extensions	2026-02-05 11:07:31 +03:00
divanoli	99311daaed	fix(telegram): prevent URL previews for file refs with TLD extensions Two layers were causing spurious link previews for file references like `README.md`, `backup.sh`, `main.go`: 1. markdown-it linkify converts `README.md` to `<a href="http://README.md">README.md</a>` (.md = Moldova TLD) 2. Telegram auto-linker treats remaining bare text as URLs ## Changes ### Primary fix: suppress auto-linkified file refs in buildTelegramLink - Added `isAutoLinkedFileRef()` helper that detects when linkify auto- generated a link from a bare filename (href = "http://" + label) - Rejects paths with domain-like segments (dots in non-final path parts) - Modified `buildTelegramLink()` to return null for these, so file refs stay as plain text and get wrapped in `<code>` by the wrapper ### Safety-net: de-linkify in wrapFileReferencesInHtml - Added pre-pass that catches auto-linkified anchors in pre-rendered HTML - Handles edge cases where HTML is passed directly (textMode: "html") - Reuses `isAutoLinkedFileRef()` logic — no duplication ### Bug fixes discovered during review - Fixed `isClosing` bug (line 169): the check `match[1] === "/"` was wrong — the regex `(<\/?)}` captures `<` or `</`, so closing tags were never detected. Changed to `match[1] === "</"`. This was causing `inCode/inPre/inAnchor` to stay stuck at true after any opening tag, breaking file ref wrapping after closing tags. - Removed double `wrapFileReferencesInHtml` call: `renderTelegramHtmlText` was calling `markdownToTelegramHtml` (which wraps) then wrapping again. ### Test coverage (+12 tests, 26 total) - `.sh` filenames (original issue #6932 mentioned backup.sh) - Auto-linkified anchor replacement - Auto-linkified path anchor replacement - Explicit link preservation (different label) - File ref after closing anchor tag (exercises isClosing fix) - Multiple file types in single message - Real URL preservation - Explicit markdown link preservation - File ref after real URL in same message - Chunked output file ref wrapping Closes #6932	2026-02-05 10:47:39 +03:00
divanoli	70f73e6f8d	fix(telegram): auto-wrap file references with TLD extensions to prevent URL previews Telegram's auto-linker aggressively treats filenames like HEARTBEAT.md, README.md, main.go, script.py as URLs and generates domain registrar previews. This fix adds comprehensive protection for file extensions that share TLDs: - High priority: .md, .go, .py, .pl, .ai, .sh - Medium priority: .io, .tv, .fm, .am, .at, .be, .cc, .co Implementation: - Added wrapFileReferencesInHtml() in format.ts - Runs AFTER markdown→HTML conversion - Tokenizes HTML to respect tag boundaries - Skips content inside <code>, <pre>, <a> tags (no nesting issues) - Applied to all rendering paths: renderTelegramHtmlText, markdownToTelegramHtml, markdownToTelegramChunks, and delivery.ts fallback Addresses review comments: - P1: Now handles chunked rendering paths correctly - P2: No longer wraps inside existing code blocks (token-based parsing) - No lookbehinds used (broad Node compatibility) Includes comprehensive test suite in format.wrap-md.test.ts AI-assisted: true	2026-02-04 15:02:54 +03:00