2 Commits

Author SHA1 Message Date
teernisse
36b361a50a fix(search): tag-aware snippet truncation prevents cutting inside <mark> pairs (GIT-5)
The old truncation counted <mark></mark> HTML tags (~13 chars per keyword)
as visible characters, causing over-aggressive truncation. When a cut
landed inside a tag pair, render_snippet would render highlighted text
as muted gray instead of bold yellow.

New truncate_snippet() walks through markup counting only visible
characters, respects tag boundaries, and always closes an open <mark>
before appending ellipsis. Includes 6 unit tests.
2026-03-12 09:28:55 -04:00
teernisse
44431667e8 feat(search): overhaul search output formatting (GIT-5)
Phase 1: Add source_entity_iid to search results via CASE subquery on
hydrate_results() for all 4 source types (issue, MR, discussion, note).
Phase 2: Fix visual alignment - compute indent from prefix visible width.
Phase 3: Show compact relative time on title line.
Phase 4: Add drill-down hint footer (lore issues <iid>).
Phase 5: Move labels to --explain mode, limit snippets to 2 terminal lines.
Phase 6: Use section_divider() for results header.

Also: promote strip_ansi/visible_width to public render utils, update
robot mode --fields minimal search preset with source_entity_iid.
2026-03-12 09:15:34 -04:00
4 changed files with 296 additions and 60 deletions

View File

@@ -1469,7 +1469,7 @@ async fn handle_search(
if robot_mode {
print_search_results_json(&response, elapsed_ms, args.fields.as_deref());
} else {
print_search_results(&response);
print_search_results(&response, explain);
}
Ok(())
}

View File

@@ -1,6 +1,6 @@
use std::collections::HashMap;
use crate::cli::render::Theme;
use crate::cli::render::{self, Theme};
use serde::Serialize;
use crate::Config;
@@ -20,11 +20,16 @@ use crate::search::{
pub struct SearchResultDisplay {
pub document_id: i64,
pub source_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_entity_iid: Option<i64>,
pub title: String,
pub url: Option<String>,
pub author: Option<String>,
pub created_at: Option<String>,
pub updated_at: Option<String>,
/// Raw epoch ms for human rendering; not serialized to JSON.
#[serde(skip)]
pub updated_at_ms: Option<i64>,
pub project_path: String,
pub labels: Vec<String>,
pub paths: Vec<String>,
@@ -216,11 +221,13 @@ pub async fn run_search(
results.push(SearchResultDisplay {
document_id: row.document_id,
source_type: row.source_type.clone(),
source_entity_iid: row.source_entity_iid,
title: row.title.clone().unwrap_or_default(),
url: row.url.clone(),
author: row.author.clone(),
created_at: row.created_at.map(ms_to_iso),
updated_at: row.updated_at.map(ms_to_iso),
updated_at_ms: row.updated_at,
project_path: row.project_path.clone(),
labels: row.labels.clone(),
paths: row.paths.clone(),
@@ -242,6 +249,7 @@ pub async fn run_search(
struct HydratedRow {
document_id: i64,
source_type: String,
source_entity_iid: Option<i64>,
title: Option<String>,
url: Option<String>,
author: Option<String>,
@@ -268,7 +276,26 @@ fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result<
(SELECT json_group_array(dl.label_name)
FROM document_labels dl WHERE dl.document_id = d.id) AS labels_json,
(SELECT json_group_array(dp.path)
FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json
FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json,
CASE d.source_type
WHEN 'issue' THEN
(SELECT i.iid FROM issues i WHERE i.id = d.source_id)
WHEN 'merge_request' THEN
(SELECT m.iid FROM merge_requests m WHERE m.id = d.source_id)
WHEN 'discussion' THEN
(SELECT COALESCE(
(SELECT i.iid FROM issues i WHERE i.id = disc.issue_id),
(SELECT m.iid FROM merge_requests m WHERE m.id = disc.merge_request_id)
) FROM discussions disc WHERE disc.id = d.source_id)
WHEN 'note' THEN
(SELECT COALESCE(
(SELECT i.iid FROM issues i WHERE i.id = disc.issue_id),
(SELECT m.iid FROM merge_requests m WHERE m.id = disc.merge_request_id)
) FROM notes n
JOIN discussions disc ON disc.id = n.discussion_id
WHERE n.id = d.source_id)
ELSE NULL
END AS source_entity_iid
FROM json_each(?1) AS j
JOIN documents d ON d.id = j.value
JOIN projects p ON p.id = d.project_id
@@ -293,6 +320,7 @@ fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result<
project_path: row.get(8)?,
labels: parse_json_array(&labels_json),
paths: parse_json_array(&paths_json),
source_entity_iid: row.get(11)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
@@ -309,6 +337,75 @@ fn parse_json_array(json: &str) -> Vec<String> {
.collect()
}
/// Truncate a snippet to `max_visible` visible characters, respecting `<mark>` tag boundaries.
///
/// Counts only visible text (not tags) toward the limit, and ensures we never cut
/// inside a `<mark>...</mark>` pair (which would break `render_snippet` highlighting).
fn truncate_snippet(snippet: &str, max_visible: usize) -> String {
if max_visible < 4 {
return snippet.to_string();
}
let mut visible_count = 0;
let mut result = String::new();
let mut remaining = snippet;
while !remaining.is_empty() {
if let Some(start) = remaining.find("<mark>") {
// Count visible chars before the tag
let before = &remaining[..start];
let before_len = before.chars().count();
if visible_count + before_len >= max_visible.saturating_sub(3) {
// Truncate within the pre-tag text
let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
let truncated: String = before.chars().take(take).collect();
result.push_str(&truncated);
result.push_str("...");
return result;
}
result.push_str(before);
visible_count += before_len;
// Find matching </mark>
let after_open = &remaining[start + 6..];
if let Some(end) = after_open.find("</mark>") {
let highlighted = &after_open[..end];
let hl_len = highlighted.chars().count();
if visible_count + hl_len >= max_visible.saturating_sub(3) {
// Truncate within the highlighted text
let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
let truncated: String = highlighted.chars().take(take).collect();
result.push_str("<mark>");
result.push_str(&truncated);
result.push_str("</mark>...");
return result;
}
result.push_str(&remaining[start..start + 6 + end + 7]);
visible_count += hl_len;
remaining = &after_open[end + 7..];
} else {
// Unclosed <mark> — treat rest as plain text
result.push_str(&remaining[start..]);
break;
}
} else {
// No more tags — handle remaining plain text
let rest_len = remaining.chars().count();
if visible_count + rest_len > max_visible && max_visible > 3 {
let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
let truncated: String = remaining.chars().take(take).collect();
result.push_str(&truncated);
result.push_str("...");
return result;
}
result.push_str(remaining);
break;
}
}
result
}
/// Render FTS snippet with `<mark>` tags as terminal highlight style.
fn render_snippet(snippet: &str) -> String {
let mut result = String::new();
@@ -326,7 +423,7 @@ fn render_snippet(snippet: &str) -> String {
result
}
pub fn print_search_results(response: &SearchResponse) {
pub fn print_search_results(response: &SearchResponse, explain: bool) {
if !response.warnings.is_empty() {
for w in &response.warnings {
eprintln!("{} {}", Theme::warning().render("Warning:"), w);
@@ -341,11 +438,13 @@ pub fn print_search_results(response: &SearchResponse) {
return;
}
// Phase 6: section divider header
println!(
"\n {} results for '{}' {}",
Theme::bold().render(&response.total_results.to_string()),
Theme::bold().render(&response.query),
Theme::muted().render(&response.mode)
"{}",
render::section_divider(&format!(
"{} results for '{}' {}",
response.total_results, response.query, response.mode
))
);
for (i, result) in response.results.iter().enumerate() {
@@ -359,52 +458,101 @@ pub fn print_search_results(response: &SearchResponse) {
_ => Theme::muted().render(&format!("{:>5}", &result.source_type)),
};
// Title line: rank, type badge, title
println!(
" {:>3}. {} {}",
Theme::muted().render(&(i + 1).to_string()),
type_badge,
Theme::bold().render(&result.title)
);
// Phase 1: entity ref (e.g. #42 or !99)
let entity_ref = result
.source_entity_iid
.map(|iid| match result.source_type.as_str() {
"issue" | "discussion" | "note" => Theme::issue_ref().render(&format!("#{iid}")),
"merge_request" => Theme::mr_ref().render(&format!("!{iid}")),
_ => String::new(),
});
// Metadata: project, author, labels — compact middle-dot line
// Phase 3: relative time
let time_str = result
.updated_at_ms
.map(|ms| Theme::dim().render(&render::format_relative_time_compact(ms)));
// Phase 2: build prefix, compute indent from its visible width
let prefix = format!(" {:>3}. {} ", i + 1, type_badge);
let indent = " ".repeat(render::visible_width(&prefix));
// Title line: rank, type badge, entity ref, title, relative time
let mut title_line = prefix;
if let Some(ref eref) = entity_ref {
title_line.push_str(eref);
title_line.push_str(" ");
}
title_line.push_str(&Theme::bold().render(&result.title));
if let Some(ref time) = time_str {
title_line.push_str(" ");
title_line.push_str(time);
}
println!("{title_line}");
// Metadata: project, author — compact middle-dot line
let sep = Theme::muted().render(" \u{b7} ");
let mut meta_parts: Vec<String> = Vec::new();
meta_parts.push(Theme::muted().render(&result.project_path));
if let Some(ref author) = result.author {
meta_parts.push(Theme::username().render(&format!("@{author}")));
}
if !result.labels.is_empty() {
let label_str = if result.labels.len() <= 3 {
result.labels.join(", ")
} else {
format!(
"{} +{}",
result.labels[..2].join(", "),
result.labels.len() - 2
)
};
meta_parts.push(Theme::muted().render(&label_str));
}
println!(" {}", meta_parts.join(&sep));
println!("{indent}{}", meta_parts.join(&sep));
// Snippet with highlight styling
let rendered = render_snippet(&result.snippet);
println!(" {rendered}");
// Phase 5: limit snippet to ~2 terminal lines.
// Truncate based on visible text length (excluding <mark></mark> tags)
// to avoid cutting inside a highlight tag pair.
let max_snippet_width =
render::terminal_width().saturating_sub(render::visible_width(&indent));
let max_snippet_chars = max_snippet_width.saturating_mul(2);
let snippet = truncate_snippet(&result.snippet, max_snippet_chars);
let rendered = render_snippet(&snippet);
println!("{indent}{rendered}");
if let Some(ref explain) = result.explain {
println!(
" {} vec={} fts={} rrf={:.4}",
if let Some(ref explain_data) = result.explain {
let mut explain_line = format!(
"{indent}{} vec={} fts={} rrf={:.4}",
Theme::accent().render("explain"),
explain
explain_data
.vector_rank
.map(|r| r.to_string())
.unwrap_or_else(|| "-".into()),
explain
explain_data
.fts_rank
.map(|r| r.to_string())
.unwrap_or_else(|| "-".into()),
explain.rrf_score
explain_data.rrf_score
);
// Phase 5: labels shown only in explain mode
if explain && !result.labels.is_empty() {
let label_str = if result.labels.len() <= 3 {
result.labels.join(", ")
} else {
format!(
"{} +{}",
result.labels[..2].join(", "),
result.labels.len() - 2
)
};
explain_line.push_str(&format!(" {}", Theme::muted().render(&label_str)));
}
println!("{explain_line}");
}
}
// Phase 4: drill-down hint footer
if let Some(first) = response.results.first()
&& let Some(iid) = first.source_entity_iid
{
let cmd = match first.source_type.as_str() {
"issue" | "discussion" | "note" => Some(format!("lore issues {iid}")),
"merge_request" => Some(format!("lore mrs {iid}")),
_ => None,
};
if let Some(cmd) = cmd {
println!(
"\n {} {}",
Theme::dim().render("Tip:"),
Theme::dim().render(&format!("{cmd} for details"))
);
}
}
@@ -444,3 +592,64 @@ pub fn print_search_results_json(
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn truncate_snippet_short_text_unchanged() {
let s = "hello world";
assert_eq!(truncate_snippet(s, 100), "hello world");
}
#[test]
fn truncate_snippet_plain_text_truncated() {
let s = "this is a long string that exceeds the limit";
let result = truncate_snippet(s, 20);
assert!(result.ends_with("..."), "got: {result}");
// Visible chars should be <= 20
assert!(result.chars().count() <= 20, "got: {result}");
}
#[test]
fn truncate_snippet_preserves_mark_tags() {
let s = "some text <mark>keyword</mark> and more text here that is long";
let result = truncate_snippet(s, 30);
// Should not cut inside a <mark> pair
let open_count = result.matches("<mark>").count();
let close_count = result.matches("</mark>").count();
assert_eq!(
open_count, close_count,
"unbalanced tags in: {result}"
);
}
#[test]
fn truncate_snippet_cuts_before_mark_tag() {
let s = "a]very long prefix that exceeds the limit <mark>word</mark>";
let result = truncate_snippet(s, 15);
assert!(result.ends_with("..."), "got: {result}");
// The <mark> tag should not appear since we truncated before reaching it
assert!(
!result.contains("<mark>"),
"should not include tag: {result}"
);
}
#[test]
fn truncate_snippet_does_not_count_tags_as_visible() {
// With tags, raw length is 42 chars. Without tags, visible is 29.
let s = "prefix <mark>keyword</mark> suffix text";
// If max_visible = 35, the visible text (29 chars) fits — should NOT truncate
let result = truncate_snippet(s, 35);
assert_eq!(result, s, "should not truncate when visible text fits");
}
#[test]
fn truncate_snippet_small_limit_returns_as_is() {
let s = "text <mark>x</mark>";
// Very small limit should return as-is (guard clause)
assert_eq!(truncate_snippet(s, 3), s);
}
}

View File

@@ -569,6 +569,32 @@ pub fn terminal_width() -> usize {
80
}
/// Strip ANSI escape codes (SGR sequences) from a string.
pub fn strip_ansi(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '\x1b' {
// Consume `[`, then digits/semicolons, then the final letter
if chars.next() == Some('[') {
for c in chars.by_ref() {
if c.is_ascii_alphabetic() {
break;
}
}
}
} else {
out.push(c);
}
}
out
}
/// Compute the visible width of a string that may contain ANSI escape sequences.
pub fn visible_width(s: &str) -> usize {
strip_ansi(s).chars().count()
}
/// Truncate a string to `max` characters, appending "..." if truncated.
pub fn truncate(s: &str, max: usize) -> String {
if max < 4 {
@@ -1459,24 +1485,19 @@ mod tests {
// ── helpers ──
/// Strip ANSI escape codes (SGR sequences) for content assertions.
/// Delegate to the public `strip_ansi` for test assertions.
fn strip_ansi(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '\x1b' {
// Consume `[`, then digits/semicolons, then the final letter
if chars.next() == Some('[') {
for c in chars.by_ref() {
if c.is_ascii_alphabetic() {
break;
}
}
}
} else {
out.push(c);
}
}
out
super::strip_ansi(s)
}
#[test]
fn visible_width_strips_ansi() {
let styled = "\x1b[1mhello\x1b[0m".to_string();
assert_eq!(super::visible_width(&styled), 5);
}
#[test]
fn visible_width_plain_string() {
assert_eq!(super::visible_width("hello"), 5);
}
}

View File

@@ -56,10 +56,16 @@ pub fn expand_fields_preset(fields: &[String], entity: &str) -> Vec<String> {
.iter()
.map(|s| (*s).to_string())
.collect(),
"search" => ["document_id", "title", "source_type", "score"]
.iter()
.map(|s| (*s).to_string())
.collect(),
"search" => [
"document_id",
"title",
"source_type",
"source_entity_iid",
"score",
]
.iter()
.map(|s| (*s).to_string())
.collect(),
"timeline" => ["timestamp", "type", "entity_iid", "detail"]
.iter()
.map(|s| (*s).to_string())