fix(search): tag-aware snippet truncation prevents cutting inside <mark> pairs (GIT-5)
The old truncation counted <mark></mark> HTML tags (~13 chars per keyword) as visible characters, causing over-aggressive truncation. When a cut landed inside a tag pair, render_snippet would render highlighted text as muted gray instead of bold yellow. New truncate_snippet() walks through markup counting only visible characters, respects tag boundaries, and always closes an open <mark> before appending ellipsis. Includes 6 unit tests.
This commit is contained in:
@@ -337,6 +337,75 @@ fn parse_json_array(json: &str) -> Vec<String> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Truncate a snippet to `max_visible` visible characters, respecting `<mark>` tag boundaries.
|
||||||
|
///
|
||||||
|
/// Counts only visible text (not tags) toward the limit, and ensures we never cut
|
||||||
|
/// inside a `<mark>...</mark>` pair (which would break `render_snippet` highlighting).
|
||||||
|
fn truncate_snippet(snippet: &str, max_visible: usize) -> String {
|
||||||
|
if max_visible < 4 {
|
||||||
|
return snippet.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut visible_count = 0;
|
||||||
|
let mut result = String::new();
|
||||||
|
let mut remaining = snippet;
|
||||||
|
|
||||||
|
while !remaining.is_empty() {
|
||||||
|
if let Some(start) = remaining.find("<mark>") {
|
||||||
|
// Count visible chars before the tag
|
||||||
|
let before = &remaining[..start];
|
||||||
|
let before_len = before.chars().count();
|
||||||
|
if visible_count + before_len >= max_visible.saturating_sub(3) {
|
||||||
|
// Truncate within the pre-tag text
|
||||||
|
let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
|
||||||
|
let truncated: String = before.chars().take(take).collect();
|
||||||
|
result.push_str(&truncated);
|
||||||
|
result.push_str("...");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
result.push_str(before);
|
||||||
|
visible_count += before_len;
|
||||||
|
|
||||||
|
// Find matching </mark>
|
||||||
|
let after_open = &remaining[start + 6..];
|
||||||
|
if let Some(end) = after_open.find("</mark>") {
|
||||||
|
let highlighted = &after_open[..end];
|
||||||
|
let hl_len = highlighted.chars().count();
|
||||||
|
if visible_count + hl_len >= max_visible.saturating_sub(3) {
|
||||||
|
// Truncate within the highlighted text
|
||||||
|
let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
|
||||||
|
let truncated: String = highlighted.chars().take(take).collect();
|
||||||
|
result.push_str("<mark>");
|
||||||
|
result.push_str(&truncated);
|
||||||
|
result.push_str("</mark>...");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
result.push_str(&remaining[start..start + 6 + end + 7]);
|
||||||
|
visible_count += hl_len;
|
||||||
|
remaining = &after_open[end + 7..];
|
||||||
|
} else {
|
||||||
|
// Unclosed <mark> — treat rest as plain text
|
||||||
|
result.push_str(&remaining[start..]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No more tags — handle remaining plain text
|
||||||
|
let rest_len = remaining.chars().count();
|
||||||
|
if visible_count + rest_len > max_visible && max_visible > 3 {
|
||||||
|
let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
|
||||||
|
let truncated: String = remaining.chars().take(take).collect();
|
||||||
|
result.push_str(&truncated);
|
||||||
|
result.push_str("...");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
result.push_str(remaining);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
/// Render FTS snippet with `<mark>` tags as terminal highlight style.
|
/// Render FTS snippet with `<mark>` tags as terminal highlight style.
|
||||||
fn render_snippet(snippet: &str) -> String {
|
fn render_snippet(snippet: &str) -> String {
|
||||||
let mut result = String::new();
|
let mut result = String::new();
|
||||||
@@ -429,17 +498,13 @@ pub fn print_search_results(response: &SearchResponse, explain: bool) {
|
|||||||
}
|
}
|
||||||
println!("{indent}{}", meta_parts.join(&sep));
|
println!("{indent}{}", meta_parts.join(&sep));
|
||||||
|
|
||||||
// Phase 5: limit snippet to ~2 terminal lines
|
// Phase 5: limit snippet to ~2 terminal lines.
|
||||||
|
// Truncate based on visible text length (excluding <mark></mark> tags)
|
||||||
|
// to avoid cutting inside a highlight tag pair.
|
||||||
let max_snippet_width =
|
let max_snippet_width =
|
||||||
render::terminal_width().saturating_sub(render::visible_width(&indent));
|
render::terminal_width().saturating_sub(render::visible_width(&indent));
|
||||||
let max_snippet_chars = max_snippet_width.saturating_mul(2);
|
let max_snippet_chars = max_snippet_width.saturating_mul(2);
|
||||||
let snippet = if result.snippet.chars().count() > max_snippet_chars && max_snippet_chars > 3
|
let snippet = truncate_snippet(&result.snippet, max_snippet_chars);
|
||||||
{
|
|
||||||
let truncated: String = result.snippet.chars().take(max_snippet_chars - 3).collect();
|
|
||||||
format!("{truncated}...")
|
|
||||||
} else {
|
|
||||||
result.snippet.clone()
|
|
||||||
};
|
|
||||||
let rendered = render_snippet(&snippet);
|
let rendered = render_snippet(&snippet);
|
||||||
println!("{indent}{rendered}");
|
println!("{indent}{rendered}");
|
||||||
|
|
||||||
@@ -527,3 +592,64 @@ pub fn print_search_results_json(
|
|||||||
Err(e) => eprintln!("Error serializing to JSON: {e}"),
|
Err(e) => eprintln!("Error serializing to JSON: {e}"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncate_snippet_short_text_unchanged() {
|
||||||
|
let s = "hello world";
|
||||||
|
assert_eq!(truncate_snippet(s, 100), "hello world");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncate_snippet_plain_text_truncated() {
|
||||||
|
let s = "this is a long string that exceeds the limit";
|
||||||
|
let result = truncate_snippet(s, 20);
|
||||||
|
assert!(result.ends_with("..."), "got: {result}");
|
||||||
|
// Visible chars should be <= 20
|
||||||
|
assert!(result.chars().count() <= 20, "got: {result}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncate_snippet_preserves_mark_tags() {
|
||||||
|
let s = "some text <mark>keyword</mark> and more text here that is long";
|
||||||
|
let result = truncate_snippet(s, 30);
|
||||||
|
// Should not cut inside a <mark> pair
|
||||||
|
let open_count = result.matches("<mark>").count();
|
||||||
|
let close_count = result.matches("</mark>").count();
|
||||||
|
assert_eq!(
|
||||||
|
open_count, close_count,
|
||||||
|
"unbalanced tags in: {result}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncate_snippet_cuts_before_mark_tag() {
|
||||||
|
let s = "a]very long prefix that exceeds the limit <mark>word</mark>";
|
||||||
|
let result = truncate_snippet(s, 15);
|
||||||
|
assert!(result.ends_with("..."), "got: {result}");
|
||||||
|
// The <mark> tag should not appear since we truncated before reaching it
|
||||||
|
assert!(
|
||||||
|
!result.contains("<mark>"),
|
||||||
|
"should not include tag: {result}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncate_snippet_does_not_count_tags_as_visible() {
|
||||||
|
// With tags, raw length is 42 chars. Without tags, visible is 29.
|
||||||
|
let s = "prefix <mark>keyword</mark> suffix text";
|
||||||
|
// If max_visible = 35, the visible text (29 chars) fits — should NOT truncate
|
||||||
|
let result = truncate_snippet(s, 35);
|
||||||
|
assert_eq!(result, s, "should not truncate when visible text fits");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncate_snippet_small_limit_returns_as_is() {
|
||||||
|
let s = "text <mark>x</mark>";
|
||||||
|
// Very small limit should return as-is (guard clause)
|
||||||
|
assert_eq!(truncate_snippet(s, 3), s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user