fix(search): tag-aware snippet truncation prevents cutting inside <mark> pairs (GIT-5)

The old truncation counted <mark></mark> HTML tags (~13 chars per keyword) as visible characters, causing over-aggressive truncation. When a cut landed inside a tag pair, render_snippet would render highlighted text as muted gray instead of bold yellow. New truncate_snippet() walks through markup counting only visible characters, respects tag boundaries, and always closes an open <mark> before appending ellipsis. Includes 6 unit tests.
2026-03-12 09:15:34 -04:00
parent 44431667e8
commit 36b361a50a
1 changed files with 134 additions and 8 deletions
--- a/src/cli/commands/search.rs
+++ b/src/cli/commands/search.rs
@@ -337,6 +337,75 @@ fn parse_json_array(json: &str) -> Vec<String> {
        .collect()
 }

+/// Truncate a snippet to `max_visible` visible characters, respecting `<mark>` tag boundaries.
+///
+/// Counts only visible text (not tags) toward the limit, and ensures we never cut
+/// inside a `<mark>...</mark>` pair (which would break `render_snippet` highlighting).
+fn truncate_snippet(snippet: &str, max_visible: usize) -> String {
+    if max_visible < 4 {
+        return snippet.to_string();
+    }
+
+    let mut visible_count = 0;
+    let mut result = String::new();
+    let mut remaining = snippet;
+
+    while !remaining.is_empty() {
+        if let Some(start) = remaining.find("<mark>") {
+            // Count visible chars before the tag
+            let before = &remaining[..start];
+            let before_len = before.chars().count();
+            if visible_count + before_len >= max_visible.saturating_sub(3) {
+                // Truncate within the pre-tag text
+                let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
+                let truncated: String = before.chars().take(take).collect();
+                result.push_str(&truncated);
+                result.push_str("...");
+                return result;
+            }
+            result.push_str(before);
+            visible_count += before_len;
+
+            // Find matching </mark>
+            let after_open = &remaining[start + 6..];
+            if let Some(end) = after_open.find("</mark>") {
+                let highlighted = &after_open[..end];
+                let hl_len = highlighted.chars().count();
+                if visible_count + hl_len >= max_visible.saturating_sub(3) {
+                    // Truncate within the highlighted text
+                    let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
+                    let truncated: String = highlighted.chars().take(take).collect();
+                    result.push_str("<mark>");
+                    result.push_str(&truncated);
+                    result.push_str("</mark>...");
+                    return result;
+                }
+                result.push_str(&remaining[start..start + 6 + end + 7]);
+                visible_count += hl_len;
+                remaining = &after_open[end + 7..];
+            } else {
+                // Unclosed <mark> — treat rest as plain text
+                result.push_str(&remaining[start..]);
+                break;
+            }
+        } else {
+            // No more tags — handle remaining plain text
+            let rest_len = remaining.chars().count();
+            if visible_count + rest_len > max_visible && max_visible > 3 {
+                let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
+                let truncated: String = remaining.chars().take(take).collect();
+                result.push_str(&truncated);
+                result.push_str("...");
+                return result;
+            }
+            result.push_str(remaining);
+            break;
+        }
+    }
+
+    result
+}
+
 /// Render FTS snippet with `<mark>` tags as terminal highlight style.
 fn render_snippet(snippet: &str) -> String {
    let mut result = String::new();
@@ -429,17 +498,13 @@ pub fn print_search_results(response: &SearchResponse, explain: bool) {
        }
        println!("{indent}{}", meta_parts.join(&sep));

-        // Phase 5: limit snippet to ~2 terminal lines
+        // Phase 5: limit snippet to ~2 terminal lines.
+        // Truncate based on visible text length (excluding <mark></mark> tags)
+        // to avoid cutting inside a highlight tag pair.
        let max_snippet_width =
            render::terminal_width().saturating_sub(render::visible_width(&indent));
        let max_snippet_chars = max_snippet_width.saturating_mul(2);
-        let snippet = if result.snippet.chars().count() > max_snippet_chars && max_snippet_chars > 3
-        {
-            let truncated: String = result.snippet.chars().take(max_snippet_chars - 3).collect();
-            format!("{truncated}...")
-        } else {
-            result.snippet.clone()
-        };
+        let snippet = truncate_snippet(&result.snippet, max_snippet_chars);
        let rendered = render_snippet(&snippet);
        println!("{indent}{rendered}");

@@ -527,3 +592,64 @@ pub fn print_search_results_json(
        Err(e) => eprintln!("Error serializing to JSON: {e}"),
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn truncate_snippet_short_text_unchanged() {
+        let s = "hello world";
+        assert_eq!(truncate_snippet(s, 100), "hello world");
+    }
+
+    #[test]
+    fn truncate_snippet_plain_text_truncated() {
+        let s = "this is a long string that exceeds the limit";
+        let result = truncate_snippet(s, 20);
+        assert!(result.ends_with("..."), "got: {result}");
+        // Visible chars should be <= 20
+        assert!(result.chars().count() <= 20, "got: {result}");
+    }
+
+    #[test]
+    fn truncate_snippet_preserves_mark_tags() {
+        let s = "some text <mark>keyword</mark> and more text here that is long";
+        let result = truncate_snippet(s, 30);
+        // Should not cut inside a <mark> pair
+        let open_count = result.matches("<mark>").count();
+        let close_count = result.matches("</mark>").count();
+        assert_eq!(
+            open_count, close_count,
+            "unbalanced tags in: {result}"
+        );
+    }
+
+    #[test]
+    fn truncate_snippet_cuts_before_mark_tag() {
+        let s = "a]very long prefix that exceeds the limit <mark>word</mark>";
+        let result = truncate_snippet(s, 15);
+        assert!(result.ends_with("..."), "got: {result}");
+        // The <mark> tag should not appear since we truncated before reaching it
+        assert!(
+            !result.contains("<mark>"),
+            "should not include tag: {result}"
+        );
+    }
+
+    #[test]
+    fn truncate_snippet_does_not_count_tags_as_visible() {
+        // With tags, raw length is 42 chars. Without tags, visible is 29.
+        let s = "prefix <mark>keyword</mark> suffix text";
+        // If max_visible = 35, the visible text (29 chars) fits — should NOT truncate
+        let result = truncate_snippet(s, 35);
+        assert_eq!(result, s, "should not truncate when visible text fits");
+    }
+
+    #[test]
+    fn truncate_snippet_small_limit_returns_as_is() {
+        let s = "text <mark>x</mark>";
+        // Very small limit should return as-is (guard clause)
+        assert_eq!(truncate_snippet(s, 3), s);
+    }
+}