Optimize markdown rendering: skip highlightAuto, fix entity-safe highlighting

Performance: Replace hljs.highlightAuto() fallback with plain escapeHtml() for unlabeled code blocks. highlightAuto tries every grammar (~6.7ms/block) while escapeHtml costs ~0.04ms. With thousands of unlabeled blocks in typical sessions this dominated render time. Import shared escapeHtml instead of the local duplicate. Import github-dark highlight.js theme CSS directly. Fix highlightSearchText to avoid corrupting HTML entities: split text on entity patterns (& < etc.) before applying search regex, so searching for 'amp' does not break & into &<mark>amp</mark>;. Add unit tests for highlightSearchText covering: plain text matches, empty queries, avoiding matches inside HTML tags, and preserving HTML entities. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 01:10:22 -05:00
parent 6a4e22f1f8
commit 9716091ecc
2 changed files with 87 additions and 12 deletions
--- a/src/client/lib/markdown.test.ts
+++ b/src/client/lib/markdown.test.ts
@@ -0,0 +1,68 @@
 // @vitest-environment jsdom
 import { describe, it, expect } from "vitest";
 import { highlightSearchText } from "./markdown";
 describe("highlightSearchText", () => {
  it("highlights plain text matches", () => {
    const result = highlightSearchText("<p>hello world</p>", "world");
    expect(result).toBe(
      '<p>hello <mark class="search-highlight">world</mark></p>'
    );
  });
  it("returns html unchanged when query is empty", () => {
    const html = "<p>hello</p>";
    expect(highlightSearchText(html, "")).toBe(html);
  });
  it("does not match inside HTML tags", () => {
    const html = '<a href="class-link">text</a>';
    const result = highlightSearchText(html, "class");
    // "class" appears in the href attribute but must not be highlighted there
    expect(result).toBe('<a href="class-link">text</a>');
  });
  it("does not corrupt HTML entities when searching for entity content", () => {
    const html = "<p>A &amp; B</p>";
    const result = highlightSearchText(html, "amp");
    // Must NOT produce &<mark>amp</mark>; — entity must remain intact
    expect(result).toBe("<p>A &amp; B</p>");
  });
  it("does not corrupt &lt; entity", () => {
    const html = "<p>a &lt; b</p>";
    const result = highlightSearchText(html, "lt");
    expect(result).toBe("<p>a &lt; b</p>");
  });
  it("does not corrupt &gt; entity", () => {
    const html = "<p>a &gt; b</p>";
    const result = highlightSearchText(html, "gt");
    expect(result).toBe("<p>a &gt; b</p>");
  });
  it("does not corrupt numeric entities", () => {
    const html = "<p>&#039;quoted&#039;</p>";
    const result = highlightSearchText(html, "039");
    expect(result).toBe("<p>&#039;quoted&#039;</p>");
  });
  it("highlights text adjacent to entities", () => {
    const html = "<p>foo &amp; bar</p>";
    const result = highlightSearchText(html, "foo");
    expect(result).toBe(
      '<p><mark class="search-highlight">foo</mark> &amp; bar</p>'
    );
  });
  it("is case-insensitive", () => {
    const result = highlightSearchText("<p>Hello World</p>", "hello");
    expect(result).toContain('<mark class="search-highlight">Hello</mark>');
  });
  it("escapes regex special characters in query", () => {
    const html = "<p>price is $100.00</p>";
    const result = highlightSearchText(html, "$100.00");
    expect(result).toContain('<mark class="search-highlight">$100.00</mark>');
  });
 });
--- a/src/client/lib/markdown.ts
+++ b/src/client/lib/markdown.ts
@@ -1,6 +1,8 @@
 import { marked } from "marked";
 import hljs from "highlight.js";
 import { markedHighlight } from "marked-highlight";
 import "highlight.js/styles/github-dark.css";
 import { escapeHtml } from "../../shared/escape-html";
 marked.use(
  markedHighlight({
@@ -8,7 +10,10 @@ marked.use(
      if (lang && hljs.getLanguage(lang)) {
        return hljs.highlight(code, { language: lang }).value;
      }
-      return hljs.highlightAuto(code).value;
+      // Plain-text fallback: highlightAuto tries every grammar (~6.7ms/block)
      // vs explicit highlight (~0.04ms). With thousands of unlabeled blocks
      // this dominates render time. Escaping is sufficient.
      return escapeHtml(code);
    },
  })
 );
@@ -22,13 +27,6 @@ export function renderMarkdown(text: string): string {
  }
 }
 function escapeHtml(text: string): string {
  return text
    .replace(/&/g, "&amp;")
    .replace(/</g, "&lt;")
    .replace(/>/g, "&gt;");
 }
 export function highlightSearchText(html: string, query: string): string {
  if (!query) return html;
  const escaped = query.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
@@ -40,10 +38,19 @@ export function highlightSearchText(html: string, query: string): string {
  for (let i = 0; i < parts.length; i++) {
    // Even indices are text content, odd indices are tags
    if (i % 2 === 0 && parts[i]) {
-      parts[i] = parts[i].replace(
+      // Further split on HTML entities (&amp; &lt; etc.) to avoid
-        regex,
+      // matching inside them — e.g. searching "amp" must not corrupt &amp;
-        '<mark class="search-highlight">$1</mark>'
+      const subParts = parts[i].split(/(&[a-zA-Z0-9#]+;)/);
-      );
+      for (let j = 0; j < subParts.length; j++) {
        // Odd indices are entities — skip them
        if (j % 2 === 0 && subParts[j]) {
          subParts[j] = subParts[j].replace(
            regex,
            '<mark class="search-highlight">$1</mark>'
          );
        }
      }
      parts[i] = subParts.join("");
    }
  }
  return parts.join("");