1 Commits

Author SHA1 Message Date
teernisse
fa7c44d88c fix(search): collapse newlines in snippets to prevent unindented metadata (GIT-5)
Document content_text includes multi-line metadata (Project:, URL:, Labels:,
State:) separated by newlines. FTS5 snippet() preserves these newlines, causing
subsequent lines to render at column 0 with no indent. collapse_newlines()
flattens all whitespace runs into single spaces before truncation and rendering.

Includes 3 unit tests.
2026-03-12 10:25:39 -04:00

View File

@@ -337,6 +337,27 @@ fn parse_json_array(json: &str) -> Vec<String> {
.collect() .collect()
} }
/// Collapse newlines and runs of whitespace in a snippet into single spaces.
///
/// Document `content_text` includes multi-line metadata (Project:, URL:, Labels:, etc.).
/// FTS5 snippet() preserves these newlines, causing unindented lines when rendered.
fn collapse_newlines(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut prev_was_space = false;
for c in s.chars() {
if c.is_ascii_whitespace() {
if !prev_was_space {
result.push(' ');
prev_was_space = true;
}
} else {
result.push(c);
prev_was_space = false;
}
}
result
}
/// Truncate a snippet to `max_visible` visible characters, respecting `<mark>` tag boundaries. /// Truncate a snippet to `max_visible` visible characters, respecting `<mark>` tag boundaries.
/// ///
/// Counts only visible text (not tags) toward the limit, and ensures we never cut /// Counts only visible text (not tags) toward the limit, and ensures we never cut
@@ -499,12 +520,15 @@ pub fn print_search_results(response: &SearchResponse, explain: bool) {
println!("{indent}{}", meta_parts.join(&sep)); println!("{indent}{}", meta_parts.join(&sep));
// Phase 5: limit snippet to ~2 terminal lines. // Phase 5: limit snippet to ~2 terminal lines.
// First collapse newlines — content_text includes multi-line metadata
// (Project:, URL:, Labels:, etc.) that would print at column 0.
let collapsed = collapse_newlines(&result.snippet);
// Truncate based on visible text length (excluding <mark></mark> tags) // Truncate based on visible text length (excluding <mark></mark> tags)
// to avoid cutting inside a highlight tag pair. // to avoid cutting inside a highlight tag pair.
let max_snippet_width = let max_snippet_width =
render::terminal_width().saturating_sub(render::visible_width(&indent)); render::terminal_width().saturating_sub(render::visible_width(&indent));
let max_snippet_chars = max_snippet_width.saturating_mul(2); let max_snippet_chars = max_snippet_width.saturating_mul(2);
let snippet = truncate_snippet(&result.snippet, max_snippet_chars); let snippet = truncate_snippet(&collapsed, max_snippet_chars);
let rendered = render_snippet(&snippet); let rendered = render_snippet(&snippet);
println!("{indent}{rendered}"); println!("{indent}{rendered}");
@@ -619,10 +643,7 @@ mod tests {
// Should not cut inside a <mark> pair // Should not cut inside a <mark> pair
let open_count = result.matches("<mark>").count(); let open_count = result.matches("<mark>").count();
let close_count = result.matches("</mark>").count(); let close_count = result.matches("</mark>").count();
assert_eq!( assert_eq!(open_count, close_count, "unbalanced tags in: {result}");
open_count, close_count,
"unbalanced tags in: {result}"
);
} }
#[test] #[test]
@@ -652,4 +673,32 @@ mod tests {
// Very small limit should return as-is (guard clause) // Very small limit should return as-is (guard clause)
assert_eq!(truncate_snippet(s, 3), s); assert_eq!(truncate_snippet(s, 3), s);
} }
#[test]
fn collapse_newlines_flattens_multiline_metadata() {
let s = "[[Issue]] #4018: Remove math.js\nProject: vs/typescript-code\nURL: https://example.com\nLabels: []";
let result = collapse_newlines(s);
assert!(
!result.contains('\n'),
"should not contain newlines: {result}"
);
assert_eq!(
result,
"[[Issue]] #4018: Remove math.js Project: vs/typescript-code URL: https://example.com Labels: []"
);
}
#[test]
fn collapse_newlines_preserves_mark_tags() {
let s = "first line\n<mark>keyword</mark>\nsecond line";
let result = collapse_newlines(s);
assert_eq!(result, "first line <mark>keyword</mark> second line");
}
#[test]
fn collapse_newlines_collapses_runs_of_whitespace() {
let s = "a \n\n b\t\tc";
let result = collapse_newlines(s);
assert_eq!(result, "a b c");
}
} }