fix(search): tag-aware snippet truncation prevents cutting inside <mark> pairs (GIT-5)

The old truncation counted <mark></mark> HTML tags (~13 chars per keyword) as visible characters, causing over-aggressive truncation. When a cut landed inside a tag pair, render_snippet would render highlighted text as muted gray instead of bold yellow. New truncate_snippet() walks through markup counting only visible characters, respects tag boundaries, and always closes an open <mark> before appending ellipsis. Includes 6 unit tests.
feat(search): overhaul search output formatting (GIT-5)
2026-03-12 09:28:55 -04:00 · 2026-03-12 09:15:34 -04:00
4 changed files with 296 additions and 60 deletions
--- a/src/app/handlers.rs
+++ b/src/app/handlers.rs
@@ -1469,7 +1469,7 @@ async fn handle_search(
    if robot_mode {
        print_search_results_json(&response, elapsed_ms, args.fields.as_deref());
    } else {
-        print_search_results(&response);
+        print_search_results(&response, explain);
    }
    Ok(())
 }
--- a/src/cli/commands/search.rs
+++ b/src/cli/commands/search.rs
@@ -1,6 +1,6 @@
 use std::collections::HashMap;

-use crate::cli::render::Theme;
+use crate::cli::render::{self, Theme};
 use serde::Serialize;

 use crate::Config;
@@ -20,11 +20,16 @@ use crate::search::{
 pub struct SearchResultDisplay {
    pub document_id: i64,
    pub source_type: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub source_entity_iid: Option<i64>,
    pub title: String,
    pub url: Option<String>,
    pub author: Option<String>,
    pub created_at: Option<String>,
    pub updated_at: Option<String>,
+    /// Raw epoch ms for human rendering; not serialized to JSON.
+    #[serde(skip)]
+    pub updated_at_ms: Option<i64>,
    pub project_path: String,
    pub labels: Vec<String>,
    pub paths: Vec<String>,
@@ -216,11 +221,13 @@ pub async fn run_search(
        results.push(SearchResultDisplay {
            document_id: row.document_id,
            source_type: row.source_type.clone(),
+            source_entity_iid: row.source_entity_iid,
            title: row.title.clone().unwrap_or_default(),
            url: row.url.clone(),
            author: row.author.clone(),
            created_at: row.created_at.map(ms_to_iso),
            updated_at: row.updated_at.map(ms_to_iso),
+            updated_at_ms: row.updated_at,
            project_path: row.project_path.clone(),
            labels: row.labels.clone(),
            paths: row.paths.clone(),
@@ -242,6 +249,7 @@ pub async fn run_search(
 struct HydratedRow {
    document_id: i64,
    source_type: String,
+    source_entity_iid: Option<i64>,
    title: Option<String>,
    url: Option<String>,
    author: Option<String>,
@@ -268,7 +276,26 @@ fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result<
               (SELECT json_group_array(dl.label_name)
                FROM document_labels dl WHERE dl.document_id = d.id) AS labels_json,
               (SELECT json_group_array(dp.path)
-                FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json
+                FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json,
+               CASE d.source_type
+                 WHEN 'issue' THEN
+                   (SELECT i.iid FROM issues i WHERE i.id = d.source_id)
+                 WHEN 'merge_request' THEN
+                   (SELECT m.iid FROM merge_requests m WHERE m.id = d.source_id)
+                 WHEN 'discussion' THEN
+                   (SELECT COALESCE(
+                     (SELECT i.iid FROM issues i WHERE i.id = disc.issue_id),
+                     (SELECT m.iid FROM merge_requests m WHERE m.id = disc.merge_request_id)
+                   ) FROM discussions disc WHERE disc.id = d.source_id)
+                 WHEN 'note' THEN
+                   (SELECT COALESCE(
+                     (SELECT i.iid FROM issues i WHERE i.id = disc.issue_id),
+                     (SELECT m.iid FROM merge_requests m WHERE m.id = disc.merge_request_id)
+                   ) FROM notes n
+                   JOIN discussions disc ON disc.id = n.discussion_id
+                   WHERE n.id = d.source_id)
+                 ELSE NULL
+               END AS source_entity_iid
        FROM json_each(?1) AS j
        JOIN documents d ON d.id = j.value
        JOIN projects p ON p.id = d.project_id
@@ -293,6 +320,7 @@ fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result<
                project_path: row.get(8)?,
                labels: parse_json_array(&labels_json),
                paths: parse_json_array(&paths_json),
+                source_entity_iid: row.get(11)?,
            })
        })?
        .collect::<std::result::Result<Vec<_>, _>>()?;
@@ -309,6 +337,75 @@ fn parse_json_array(json: &str) -> Vec<String> {
        .collect()
 }

+/// Truncate a snippet to `max_visible` visible characters, respecting `<mark>` tag boundaries.
+///
+/// Counts only visible text (not tags) toward the limit, and ensures we never cut
+/// inside a `<mark>...</mark>` pair (which would break `render_snippet` highlighting).
+fn truncate_snippet(snippet: &str, max_visible: usize) -> String {
+    if max_visible < 4 {
+        return snippet.to_string();
+    }
+
+    let mut visible_count = 0;
+    let mut result = String::new();
+    let mut remaining = snippet;
+
+    while !remaining.is_empty() {
+        if let Some(start) = remaining.find("<mark>") {
+            // Count visible chars before the tag
+            let before = &remaining[..start];
+            let before_len = before.chars().count();
+            if visible_count + before_len >= max_visible.saturating_sub(3) {
+                // Truncate within the pre-tag text
+                let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
+                let truncated: String = before.chars().take(take).collect();
+                result.push_str(&truncated);
+                result.push_str("...");
+                return result;
+            }
+            result.push_str(before);
+            visible_count += before_len;
+
+            // Find matching </mark>
+            let after_open = &remaining[start + 6..];
+            if let Some(end) = after_open.find("</mark>") {
+                let highlighted = &after_open[..end];
+                let hl_len = highlighted.chars().count();
+                if visible_count + hl_len >= max_visible.saturating_sub(3) {
+                    // Truncate within the highlighted text
+                    let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
+                    let truncated: String = highlighted.chars().take(take).collect();
+                    result.push_str("<mark>");
+                    result.push_str(&truncated);
+                    result.push_str("</mark>...");
+                    return result;
+                }
+                result.push_str(&remaining[start..start + 6 + end + 7]);
+                visible_count += hl_len;
+                remaining = &after_open[end + 7..];
+            } else {
+                // Unclosed <mark> — treat rest as plain text
+                result.push_str(&remaining[start..]);
+                break;
+            }
+        } else {
+            // No more tags — handle remaining plain text
+            let rest_len = remaining.chars().count();
+            if visible_count + rest_len > max_visible && max_visible > 3 {
+                let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
+                let truncated: String = remaining.chars().take(take).collect();
+                result.push_str(&truncated);
+                result.push_str("...");
+                return result;
+            }
+            result.push_str(remaining);
+            break;
+        }
+    }
+
+    result
+}
+
 /// Render FTS snippet with `<mark>` tags as terminal highlight style.
 fn render_snippet(snippet: &str) -> String {
    let mut result = String::new();
@@ -326,7 +423,7 @@ fn render_snippet(snippet: &str) -> String {
    result
 }

-pub fn print_search_results(response: &SearchResponse) {
+pub fn print_search_results(response: &SearchResponse, explain: bool) {
    if !response.warnings.is_empty() {
        for w in &response.warnings {
            eprintln!("{} {}", Theme::warning().render("Warning:"), w);
@@ -341,11 +438,13 @@ pub fn print_search_results(response: &SearchResponse) {
        return;
    }

+    // Phase 6: section divider header
    println!(
-        "\n   {} results for '{}'  {}",
-        Theme::bold().render(&response.total_results.to_string()),
-        Theme::bold().render(&response.query),
-        Theme::muted().render(&response.mode)
+        "{}",
+        render::section_divider(&format!(
+            "{} results for '{}'  {}",
+            response.total_results, response.query, response.mode
+        ))
    );

    for (i, result) in response.results.iter().enumerate() {
@@ -359,52 +458,101 @@ pub fn print_search_results(response: &SearchResponse) {
            _ => Theme::muted().render(&format!("{:>5}", &result.source_type)),
        };

-        // Title line: rank, type badge, title
-        println!(
-            "  {:>3}.  {}  {}",
-            Theme::muted().render(&(i + 1).to_string()),
-            type_badge,
-            Theme::bold().render(&result.title)
-        );
+        // Phase 1: entity ref (e.g. #42 or !99)
+        let entity_ref = result
+            .source_entity_iid
+            .map(|iid| match result.source_type.as_str() {
+                "issue" | "discussion" | "note" => Theme::issue_ref().render(&format!("#{iid}")),
+                "merge_request" => Theme::mr_ref().render(&format!("!{iid}")),
+                _ => String::new(),
+            });

-        // Metadata: project, author, labels — compact middle-dot line
+        // Phase 3: relative time
+        let time_str = result
+            .updated_at_ms
+            .map(|ms| Theme::dim().render(&render::format_relative_time_compact(ms)));
+
+        // Phase 2: build prefix, compute indent from its visible width
+        let prefix = format!("  {:>3}.  {}  ", i + 1, type_badge);
+        let indent = " ".repeat(render::visible_width(&prefix));
+
+        // Title line: rank, type badge, entity ref, title, relative time
+        let mut title_line = prefix;
+        if let Some(ref eref) = entity_ref {
+            title_line.push_str(eref);
+            title_line.push_str("  ");
+        }
+        title_line.push_str(&Theme::bold().render(&result.title));
+        if let Some(ref time) = time_str {
+            title_line.push_str("  ");
+            title_line.push_str(time);
+        }
+        println!("{title_line}");
+
+        // Metadata: project, author — compact middle-dot line
        let sep = Theme::muted().render(" \u{b7} ");
        let mut meta_parts: Vec<String> = Vec::new();
        meta_parts.push(Theme::muted().render(&result.project_path));
        if let Some(ref author) = result.author {
            meta_parts.push(Theme::username().render(&format!("@{author}")));
        }
-        if !result.labels.is_empty() {
-            let label_str = if result.labels.len() <= 3 {
-                result.labels.join(", ")
-            } else {
-                format!(
-                    "{} +{}",
-                    result.labels[..2].join(", "),
-                    result.labels.len() - 2
-                )
-            };
-            meta_parts.push(Theme::muted().render(&label_str));
-        }
-        println!("        {}", meta_parts.join(&sep));
+        println!("{indent}{}", meta_parts.join(&sep));

-        // Snippet with highlight styling
-        let rendered = render_snippet(&result.snippet);
-        println!("        {rendered}");
+        // Phase 5: limit snippet to ~2 terminal lines.
+        // Truncate based on visible text length (excluding <mark></mark> tags)
+        // to avoid cutting inside a highlight tag pair.
+        let max_snippet_width =
+            render::terminal_width().saturating_sub(render::visible_width(&indent));
+        let max_snippet_chars = max_snippet_width.saturating_mul(2);
+        let snippet = truncate_snippet(&result.snippet, max_snippet_chars);
+        let rendered = render_snippet(&snippet);
+        println!("{indent}{rendered}");

-        if let Some(ref explain) = result.explain {
-            println!(
-                "        {} vec={} fts={} rrf={:.4}",
+        if let Some(ref explain_data) = result.explain {
+            let mut explain_line = format!(
+                "{indent}{} vec={} fts={} rrf={:.4}",
                Theme::accent().render("explain"),
-                explain
+                explain_data
                    .vector_rank
                    .map(|r| r.to_string())
                    .unwrap_or_else(|| "-".into()),
-                explain
+                explain_data
                    .fts_rank
                    .map(|r| r.to_string())
                    .unwrap_or_else(|| "-".into()),
-                explain.rrf_score
+                explain_data.rrf_score
+            );
+            // Phase 5: labels shown only in explain mode
+            if explain && !result.labels.is_empty() {
+                let label_str = if result.labels.len() <= 3 {
+                    result.labels.join(", ")
+                } else {
+                    format!(
+                        "{} +{}",
+                        result.labels[..2].join(", "),
+                        result.labels.len() - 2
+                    )
+                };
+                explain_line.push_str(&format!("  {}", Theme::muted().render(&label_str)));
+            }
+            println!("{explain_line}");
+        }
+    }
+
+    // Phase 4: drill-down hint footer
+    if let Some(first) = response.results.first()
+        && let Some(iid) = first.source_entity_iid
+    {
+        let cmd = match first.source_type.as_str() {
+            "issue" | "discussion" | "note" => Some(format!("lore issues {iid}")),
+            "merge_request" => Some(format!("lore mrs {iid}")),
+            _ => None,
+        };
+        if let Some(cmd) = cmd {
+            println!(
+                "\n  {} {}",
+                Theme::dim().render("Tip:"),
+                Theme::dim().render(&format!("{cmd} for details"))
            );
        }
    }
@@ -444,3 +592,64 @@ pub fn print_search_results_json(
        Err(e) => eprintln!("Error serializing to JSON: {e}"),
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn truncate_snippet_short_text_unchanged() {
+        let s = "hello world";
+        assert_eq!(truncate_snippet(s, 100), "hello world");
+    }
+
+    #[test]
+    fn truncate_snippet_plain_text_truncated() {
+        let s = "this is a long string that exceeds the limit";
+        let result = truncate_snippet(s, 20);
+        assert!(result.ends_with("..."), "got: {result}");
+        // Visible chars should be <= 20
+        assert!(result.chars().count() <= 20, "got: {result}");
+    }
+
+    #[test]
+    fn truncate_snippet_preserves_mark_tags() {
+        let s = "some text <mark>keyword</mark> and more text here that is long";
+        let result = truncate_snippet(s, 30);
+        // Should not cut inside a <mark> pair
+        let open_count = result.matches("<mark>").count();
+        let close_count = result.matches("</mark>").count();
+        assert_eq!(
+            open_count, close_count,
+            "unbalanced tags in: {result}"
+        );
+    }
+
+    #[test]
+    fn truncate_snippet_cuts_before_mark_tag() {
+        let s = "a]very long prefix that exceeds the limit <mark>word</mark>";
+        let result = truncate_snippet(s, 15);
+        assert!(result.ends_with("..."), "got: {result}");
+        // The <mark> tag should not appear since we truncated before reaching it
+        assert!(
+            !result.contains("<mark>"),
+            "should not include tag: {result}"
+        );
+    }
+
+    #[test]
+    fn truncate_snippet_does_not_count_tags_as_visible() {
+        // With tags, raw length is 42 chars. Without tags, visible is 29.
+        let s = "prefix <mark>keyword</mark> suffix text";
+        // If max_visible = 35, the visible text (29 chars) fits — should NOT truncate
+        let result = truncate_snippet(s, 35);
+        assert_eq!(result, s, "should not truncate when visible text fits");
+    }
+
+    #[test]
+    fn truncate_snippet_small_limit_returns_as_is() {
+        let s = "text <mark>x</mark>";
+        // Very small limit should return as-is (guard clause)
+        assert_eq!(truncate_snippet(s, 3), s);
+    }
+}
--- a/src/cli/render.rs
+++ b/src/cli/render.rs
@@ -569,6 +569,32 @@ pub fn terminal_width() -> usize {
    80
 }

+/// Strip ANSI escape codes (SGR sequences) from a string.
+pub fn strip_ansi(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut chars = s.chars();
+    while let Some(c) = chars.next() {
+        if c == '\x1b' {
+            // Consume `[`, then digits/semicolons, then the final letter
+            if chars.next() == Some('[') {
+                for c in chars.by_ref() {
+                    if c.is_ascii_alphabetic() {
+                        break;
+                    }
+                }
+            }
+        } else {
+            out.push(c);
+        }
+    }
+    out
+}
+
+/// Compute the visible width of a string that may contain ANSI escape sequences.
+pub fn visible_width(s: &str) -> usize {
+    strip_ansi(s).chars().count()
+}
+
 /// Truncate a string to `max` characters, appending "..." if truncated.
 pub fn truncate(s: &str, max: usize) -> String {
    if max < 4 {
@@ -1459,24 +1485,19 @@ mod tests {

    // ── helpers ──

-    /// Strip ANSI escape codes (SGR sequences) for content assertions.
+    /// Delegate to the public `strip_ansi` for test assertions.
    fn strip_ansi(s: &str) -> String {
-        let mut out = String::with_capacity(s.len());
-        let mut chars = s.chars();
-        while let Some(c) = chars.next() {
-            if c == '\x1b' {
-                // Consume `[`, then digits/semicolons, then the final letter
-                if chars.next() == Some('[') {
-                    for c in chars.by_ref() {
-                        if c.is_ascii_alphabetic() {
-                            break;
-                        }
-                    }
-                }
-            } else {
-                out.push(c);
-            }
-        }
-        out
+        super::strip_ansi(s)
+    }
+
+    #[test]
+    fn visible_width_strips_ansi() {
+        let styled = "\x1b[1mhello\x1b[0m".to_string();
+        assert_eq!(super::visible_width(&styled), 5);
+    }
+
+    #[test]
+    fn visible_width_plain_string() {
+        assert_eq!(super::visible_width("hello"), 5);
    }
 }
--- a/src/cli/robot.rs
+++ b/src/cli/robot.rs
@@ -56,10 +56,16 @@ pub fn expand_fields_preset(fields: &[String], entity: &str) -> Vec<String> {
                .iter()
                .map(|s| (*s).to_string())
                .collect(),
-            "search" => ["document_id", "title", "source_type", "score"]
-                .iter()
-                .map(|s| (*s).to_string())
-                .collect(),
+            "search" => [
+                "document_id",
+                "title",
+                "source_type",
+                "source_entity_iid",
+                "score",
+            ]
+            .iter()
+            .map(|s| (*s).to_string())
+            .collect(),
            "timeline" => ["timestamp", "type", "entity_iid", "detail"]
                .iter()
                .map(|s| (*s).to_string())