Optimize markdown rendering: skip highlightAuto, fix entity-safe highlighting

Performance: Replace hljs.highlightAuto() fallback with plain escapeHtml()
for unlabeled code blocks. highlightAuto tries every grammar (~6.7ms/block)
while escapeHtml costs ~0.04ms. With thousands of unlabeled blocks in
typical sessions this dominated render time.

Import shared escapeHtml instead of the local duplicate. Import github-dark
highlight.js theme CSS directly.

Fix highlightSearchText to avoid corrupting HTML entities: split text on
entity patterns (& < etc.) before applying search regex, so searching
for 'amp' does not break &amp; into &<mark>amp</mark>;.

Add unit tests for highlightSearchText covering: plain text matches, empty
queries, avoiding matches inside HTML tags, and preserving HTML entities.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-30 01:10:22 -05:00
parent 6a4e22f1f8
commit 9716091ecc
2 changed files with 87 additions and 12 deletions

View File

@@ -0,0 +1,68 @@
// @vitest-environment jsdom
import { describe, it, expect } from "vitest";
import { highlightSearchText } from "./markdown";
describe("highlightSearchText", () => {
it("highlights plain text matches", () => {
const result = highlightSearchText("<p>hello world</p>", "world");
expect(result).toBe(
'<p>hello <mark class="search-highlight">world</mark></p>'
);
});
it("returns html unchanged when query is empty", () => {
const html = "<p>hello</p>";
expect(highlightSearchText(html, "")).toBe(html);
});
it("does not match inside HTML tags", () => {
const html = '<a href="class-link">text</a>';
const result = highlightSearchText(html, "class");
// "class" appears in the href attribute but must not be highlighted there
expect(result).toBe('<a href="class-link">text</a>');
});
it("does not corrupt HTML entities when searching for entity content", () => {
const html = "<p>A &amp; B</p>";
const result = highlightSearchText(html, "amp");
// Must NOT produce &<mark>amp</mark>; — entity must remain intact
expect(result).toBe("<p>A &amp; B</p>");
});
it("does not corrupt &lt; entity", () => {
const html = "<p>a &lt; b</p>";
const result = highlightSearchText(html, "lt");
expect(result).toBe("<p>a &lt; b</p>");
});
it("does not corrupt &gt; entity", () => {
const html = "<p>a &gt; b</p>";
const result = highlightSearchText(html, "gt");
expect(result).toBe("<p>a &gt; b</p>");
});
it("does not corrupt numeric entities", () => {
const html = "<p>&#039;quoted&#039;</p>";
const result = highlightSearchText(html, "039");
expect(result).toBe("<p>&#039;quoted&#039;</p>");
});
it("highlights text adjacent to entities", () => {
const html = "<p>foo &amp; bar</p>";
const result = highlightSearchText(html, "foo");
expect(result).toBe(
'<p><mark class="search-highlight">foo</mark> &amp; bar</p>'
);
});
it("is case-insensitive", () => {
const result = highlightSearchText("<p>Hello World</p>", "hello");
expect(result).toContain('<mark class="search-highlight">Hello</mark>');
});
it("escapes regex special characters in query", () => {
const html = "<p>price is $100.00</p>";
const result = highlightSearchText(html, "$100.00");
expect(result).toContain('<mark class="search-highlight">$100.00</mark>');
});
});

View File

@@ -1,6 +1,8 @@
import { marked } from "marked"; import { marked } from "marked";
import hljs from "highlight.js"; import hljs from "highlight.js";
import { markedHighlight } from "marked-highlight"; import { markedHighlight } from "marked-highlight";
import "highlight.js/styles/github-dark.css";
import { escapeHtml } from "../../shared/escape-html";
marked.use( marked.use(
markedHighlight({ markedHighlight({
@@ -8,7 +10,10 @@ marked.use(
if (lang && hljs.getLanguage(lang)) { if (lang && hljs.getLanguage(lang)) {
return hljs.highlight(code, { language: lang }).value; return hljs.highlight(code, { language: lang }).value;
} }
return hljs.highlightAuto(code).value; // Plain-text fallback: highlightAuto tries every grammar (~6.7ms/block)
// vs explicit highlight (~0.04ms). With thousands of unlabeled blocks
// this dominates render time. Escaping is sufficient.
return escapeHtml(code);
}, },
}) })
); );
@@ -22,13 +27,6 @@ export function renderMarkdown(text: string): string {
} }
} }
function escapeHtml(text: string): string {
return text
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;");
}
export function highlightSearchText(html: string, query: string): string { export function highlightSearchText(html: string, query: string): string {
if (!query) return html; if (!query) return html;
const escaped = query.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const escaped = query.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
@@ -40,10 +38,19 @@ export function highlightSearchText(html: string, query: string): string {
for (let i = 0; i < parts.length; i++) { for (let i = 0; i < parts.length; i++) {
// Even indices are text content, odd indices are tags // Even indices are text content, odd indices are tags
if (i % 2 === 0 && parts[i]) { if (i % 2 === 0 && parts[i]) {
parts[i] = parts[i].replace( // Further split on HTML entities (&amp; &lt; etc.) to avoid
regex, // matching inside them — e.g. searching "amp" must not corrupt &amp;
'<mark class="search-highlight">$1</mark>' const subParts = parts[i].split(/(&[a-zA-Z0-9#]+;)/);
); for (let j = 0; j < subParts.length; j++) {
// Odd indices are entities — skip them
if (j % 2 === 0 && subParts[j]) {
subParts[j] = subParts[j].replace(
regex,
'<mark class="search-highlight">$1</mark>'
);
}
}
parts[i] = subParts.join("");
} }
} }
return parts.join(""); return parts.join("");