Change collapse_whitespace() from is_ascii_whitespace() to is_whitespace() so non-breaking spaces, em-spaces, and other Unicode whitespace characters in search snippets are also collapsed into single spaces. Additionally fix serde_json::to_value() call site to handle serialization errors gracefully instead of unwrapping. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
711 lines
24 KiB
Rust
711 lines
24 KiB
Rust
use std::collections::HashMap;
|
|
|
|
use crate::cli::render::{self, Theme};
|
|
use serde::Serialize;
|
|
|
|
use crate::Config;
|
|
use crate::core::db::create_connection;
|
|
use crate::core::error::{LoreError, Result};
|
|
use crate::core::paths::get_db_path;
|
|
use crate::core::project::resolve_project;
|
|
use crate::core::time::{ms_to_iso, parse_since};
|
|
use crate::documents::SourceType;
|
|
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
|
|
use crate::search::{
|
|
FtsQueryMode, HybridResult, PathFilter, SearchFilters, SearchMode, get_result_snippet,
|
|
search_fts, search_hybrid,
|
|
};
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct SearchResultDisplay {
|
|
pub document_id: i64,
|
|
pub source_type: String,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub source_entity_iid: Option<i64>,
|
|
pub title: String,
|
|
pub url: Option<String>,
|
|
pub author: Option<String>,
|
|
pub created_at: Option<String>,
|
|
pub updated_at: Option<String>,
|
|
/// Raw epoch ms for human rendering; not serialized to JSON.
|
|
#[serde(skip)]
|
|
pub updated_at_ms: Option<i64>,
|
|
pub project_path: String,
|
|
pub labels: Vec<String>,
|
|
pub paths: Vec<String>,
|
|
pub snippet: String,
|
|
pub score: f64,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub explain: Option<ExplainData>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct ExplainData {
|
|
pub vector_rank: Option<usize>,
|
|
pub fts_rank: Option<usize>,
|
|
pub rrf_score: f64,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct SearchResponse {
|
|
pub query: String,
|
|
pub mode: String,
|
|
pub total_results: usize,
|
|
pub results: Vec<SearchResultDisplay>,
|
|
pub warnings: Vec<String>,
|
|
}
|
|
|
|
pub struct SearchCliFilters {
|
|
pub source_type: Option<String>,
|
|
pub author: Option<String>,
|
|
pub project: Option<String>,
|
|
pub labels: Vec<String>,
|
|
pub path: Option<String>,
|
|
pub since: Option<String>,
|
|
pub updated_since: Option<String>,
|
|
pub limit: usize,
|
|
}
|
|
|
|
pub async fn run_search(
|
|
config: &Config,
|
|
query: &str,
|
|
cli_filters: SearchCliFilters,
|
|
fts_mode: FtsQueryMode,
|
|
requested_mode: &str,
|
|
explain: bool,
|
|
) -> Result<SearchResponse> {
|
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
|
let conn = create_connection(&db_path)?;
|
|
|
|
let mut warnings: Vec<String> = Vec::new();
|
|
|
|
let actual_mode = SearchMode::parse(requested_mode).unwrap_or(SearchMode::Hybrid);
|
|
|
|
let client = if actual_mode != SearchMode::Lexical {
|
|
let ollama_cfg = &config.embedding;
|
|
Some(OllamaClient::new(OllamaConfig {
|
|
base_url: ollama_cfg.base_url.clone(),
|
|
model: ollama_cfg.model.clone(),
|
|
..OllamaConfig::default()
|
|
}))
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let doc_count: i64 = conn
|
|
.query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0))
|
|
.unwrap_or(0);
|
|
|
|
if doc_count == 0 {
|
|
warnings.push("No documents indexed. Run 'lore generate-docs' first.".to_string());
|
|
return Ok(SearchResponse {
|
|
query: query.to_string(),
|
|
mode: actual_mode.as_str().to_string(),
|
|
total_results: 0,
|
|
results: vec![],
|
|
warnings,
|
|
});
|
|
}
|
|
|
|
let source_type = cli_filters
|
|
.source_type
|
|
.as_deref()
|
|
.and_then(SourceType::parse);
|
|
|
|
let project_id = cli_filters
|
|
.project
|
|
.as_deref()
|
|
.map(|p| resolve_project(&conn, p))
|
|
.transpose()?;
|
|
|
|
let since = cli_filters
|
|
.since
|
|
.as_deref()
|
|
.map(|s| {
|
|
parse_since(s).ok_or_else(|| {
|
|
LoreError::Other(format!(
|
|
"Invalid --since value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.",
|
|
s
|
|
))
|
|
})
|
|
})
|
|
.transpose()?;
|
|
let updated_since = cli_filters
|
|
.updated_since
|
|
.as_deref()
|
|
.map(|s| {
|
|
parse_since(s).ok_or_else(|| {
|
|
LoreError::Other(format!(
|
|
"Invalid --updated-since value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.",
|
|
s
|
|
))
|
|
})
|
|
})
|
|
.transpose()?;
|
|
|
|
let path = cli_filters.path.as_deref().map(|p| {
|
|
if p.ends_with('/') {
|
|
PathFilter::Prefix(p.to_string())
|
|
} else {
|
|
PathFilter::Exact(p.to_string())
|
|
}
|
|
});
|
|
|
|
let filters = SearchFilters {
|
|
source_type,
|
|
author: cli_filters.author,
|
|
project_id,
|
|
since,
|
|
updated_since,
|
|
labels: cli_filters.labels,
|
|
path,
|
|
limit: cli_filters.limit,
|
|
};
|
|
|
|
// Run FTS separately for snippet extraction (search_hybrid doesn't return snippets).
|
|
let snippet_top_k = filters
|
|
.clamp_limit()
|
|
.checked_mul(10)
|
|
.unwrap_or(500)
|
|
.clamp(50, 1500);
|
|
let fts_results = search_fts(&conn, query, snippet_top_k, fts_mode)?;
|
|
let snippet_map: HashMap<i64, String> = fts_results
|
|
.iter()
|
|
.map(|r| (r.document_id, r.snippet.clone()))
|
|
.collect();
|
|
|
|
// search_hybrid handles recall sizing, RRF ranking, and filter application internally.
|
|
let (hybrid_results, mut hybrid_warnings) = search_hybrid(
|
|
&conn,
|
|
client.as_ref(),
|
|
query,
|
|
actual_mode,
|
|
&filters,
|
|
fts_mode,
|
|
)
|
|
.await?;
|
|
warnings.append(&mut hybrid_warnings);
|
|
|
|
if hybrid_results.is_empty() {
|
|
return Ok(SearchResponse {
|
|
query: query.to_string(),
|
|
mode: actual_mode.as_str().to_string(),
|
|
total_results: 0,
|
|
results: vec![],
|
|
warnings,
|
|
});
|
|
}
|
|
|
|
let ranked_ids: Vec<i64> = hybrid_results.iter().map(|r| r.document_id).collect();
|
|
let hydrated = hydrate_results(&conn, &ranked_ids)?;
|
|
|
|
let hybrid_map: HashMap<i64, &HybridResult> =
|
|
hybrid_results.iter().map(|r| (r.document_id, r)).collect();
|
|
|
|
let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len());
|
|
for row in &hydrated {
|
|
let hr = hybrid_map.get(&row.document_id);
|
|
let fts_snippet = snippet_map.get(&row.document_id).map(|s| s.as_str());
|
|
let snippet = get_result_snippet(fts_snippet, &row.content_text);
|
|
|
|
let explain_data = if explain {
|
|
hr.map(|r| ExplainData {
|
|
vector_rank: r.vector_rank,
|
|
fts_rank: r.fts_rank,
|
|
rrf_score: r.rrf_score,
|
|
})
|
|
} else {
|
|
None
|
|
};
|
|
|
|
results.push(SearchResultDisplay {
|
|
document_id: row.document_id,
|
|
source_type: row.source_type.clone(),
|
|
source_entity_iid: row.source_entity_iid,
|
|
title: row.title.clone().unwrap_or_default(),
|
|
url: row.url.clone(),
|
|
author: row.author.clone(),
|
|
created_at: row.created_at.map(ms_to_iso),
|
|
updated_at: row.updated_at.map(ms_to_iso),
|
|
updated_at_ms: row.updated_at,
|
|
project_path: row.project_path.clone(),
|
|
labels: row.labels.clone(),
|
|
paths: row.paths.clone(),
|
|
snippet,
|
|
score: hr.map(|r| r.score).unwrap_or(0.0),
|
|
explain: explain_data,
|
|
});
|
|
}
|
|
|
|
Ok(SearchResponse {
|
|
query: query.to_string(),
|
|
mode: actual_mode.as_str().to_string(),
|
|
total_results: results.len(),
|
|
results,
|
|
warnings,
|
|
})
|
|
}
|
|
|
|
struct HydratedRow {
|
|
document_id: i64,
|
|
source_type: String,
|
|
source_entity_iid: Option<i64>,
|
|
title: Option<String>,
|
|
url: Option<String>,
|
|
author: Option<String>,
|
|
created_at: Option<i64>,
|
|
updated_at: Option<i64>,
|
|
content_text: String,
|
|
project_path: String,
|
|
labels: Vec<String>,
|
|
paths: Vec<String>,
|
|
}
|
|
|
|
fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result<Vec<HydratedRow>> {
|
|
if document_ids.is_empty() {
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
let ids_json =
|
|
serde_json::to_string(document_ids).map_err(|e| LoreError::Other(e.to_string()))?;
|
|
|
|
let sql = r#"
|
|
SELECT d.id, d.source_type, d.title, d.url, d.author_username,
|
|
d.created_at, d.updated_at, d.content_text,
|
|
p.path_with_namespace AS project_path,
|
|
(SELECT json_group_array(dl.label_name)
|
|
FROM document_labels dl WHERE dl.document_id = d.id) AS labels_json,
|
|
(SELECT json_group_array(dp.path)
|
|
FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json,
|
|
CASE d.source_type
|
|
WHEN 'issue' THEN
|
|
(SELECT i.iid FROM issues i WHERE i.id = d.source_id)
|
|
WHEN 'merge_request' THEN
|
|
(SELECT m.iid FROM merge_requests m WHERE m.id = d.source_id)
|
|
WHEN 'discussion' THEN
|
|
(SELECT COALESCE(
|
|
(SELECT i.iid FROM issues i WHERE i.id = disc.issue_id),
|
|
(SELECT m.iid FROM merge_requests m WHERE m.id = disc.merge_request_id)
|
|
) FROM discussions disc WHERE disc.id = d.source_id)
|
|
WHEN 'note' THEN
|
|
(SELECT COALESCE(
|
|
(SELECT i.iid FROM issues i WHERE i.id = disc.issue_id),
|
|
(SELECT m.iid FROM merge_requests m WHERE m.id = disc.merge_request_id)
|
|
) FROM notes n
|
|
JOIN discussions disc ON disc.id = n.discussion_id
|
|
WHERE n.id = d.source_id)
|
|
ELSE NULL
|
|
END AS source_entity_iid
|
|
FROM json_each(?1) AS j
|
|
JOIN documents d ON d.id = j.value
|
|
JOIN projects p ON p.id = d.project_id
|
|
ORDER BY j.key
|
|
"#;
|
|
|
|
let mut stmt = conn.prepare(sql)?;
|
|
let rows = stmt
|
|
.query_map([ids_json], |row| {
|
|
let labels_json: String = row.get(9)?;
|
|
let paths_json: String = row.get(10)?;
|
|
|
|
Ok(HydratedRow {
|
|
document_id: row.get(0)?,
|
|
source_type: row.get(1)?,
|
|
title: row.get(2)?,
|
|
url: row.get(3)?,
|
|
author: row.get(4)?,
|
|
created_at: row.get(5)?,
|
|
updated_at: row.get(6)?,
|
|
content_text: row.get(7)?,
|
|
project_path: row.get(8)?,
|
|
labels: parse_json_array(&labels_json),
|
|
paths: parse_json_array(&paths_json),
|
|
source_entity_iid: row.get(11)?,
|
|
})
|
|
})?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
|
|
Ok(rows)
|
|
}
|
|
|
|
fn parse_json_array(json: &str) -> Vec<String> {
|
|
serde_json::from_str::<Vec<serde_json::Value>>(json)
|
|
.unwrap_or_default()
|
|
.into_iter()
|
|
.filter_map(|v| v.as_str().map(|s| s.to_string()))
|
|
.filter(|s| !s.is_empty())
|
|
.collect()
|
|
}
|
|
|
|
/// Collapse newlines and runs of whitespace in a snippet into single spaces.
|
|
///
|
|
/// Document `content_text` includes multi-line metadata (Project:, URL:, Labels:, etc.).
|
|
/// FTS5 snippet() preserves these newlines, causing unindented lines when rendered.
|
|
fn collapse_newlines(s: &str) -> String {
|
|
let mut result = String::with_capacity(s.len());
|
|
let mut prev_was_space = false;
|
|
for c in s.chars() {
|
|
if c.is_whitespace() {
|
|
if !prev_was_space {
|
|
result.push(' ');
|
|
prev_was_space = true;
|
|
}
|
|
} else {
|
|
result.push(c);
|
|
prev_was_space = false;
|
|
}
|
|
}
|
|
result
|
|
}
|
|
|
|
/// Truncate a snippet to `max_visible` visible characters, respecting `<mark>` tag boundaries.
|
|
///
|
|
/// Counts only visible text (not tags) toward the limit, and ensures we never cut
|
|
/// inside a `<mark>...</mark>` pair (which would break `render_snippet` highlighting).
|
|
fn truncate_snippet(snippet: &str, max_visible: usize) -> String {
|
|
if max_visible < 4 {
|
|
return snippet.to_string();
|
|
}
|
|
|
|
let mut visible_count = 0;
|
|
let mut result = String::new();
|
|
let mut remaining = snippet;
|
|
|
|
while !remaining.is_empty() {
|
|
if let Some(start) = remaining.find("<mark>") {
|
|
// Count visible chars before the tag
|
|
let before = &remaining[..start];
|
|
let before_len = before.chars().count();
|
|
if visible_count + before_len >= max_visible.saturating_sub(3) {
|
|
// Truncate within the pre-tag text
|
|
let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
|
|
let truncated: String = before.chars().take(take).collect();
|
|
result.push_str(&truncated);
|
|
result.push_str("...");
|
|
return result;
|
|
}
|
|
result.push_str(before);
|
|
visible_count += before_len;
|
|
|
|
// Find matching </mark>
|
|
let after_open = &remaining[start + 6..];
|
|
if let Some(end) = after_open.find("</mark>") {
|
|
let highlighted = &after_open[..end];
|
|
let hl_len = highlighted.chars().count();
|
|
if visible_count + hl_len >= max_visible.saturating_sub(3) {
|
|
// Truncate within the highlighted text
|
|
let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
|
|
let truncated: String = highlighted.chars().take(take).collect();
|
|
result.push_str("<mark>");
|
|
result.push_str(&truncated);
|
|
result.push_str("</mark>...");
|
|
return result;
|
|
}
|
|
result.push_str(&remaining[start..start + 6 + end + 7]);
|
|
visible_count += hl_len;
|
|
remaining = &after_open[end + 7..];
|
|
} else {
|
|
// Unclosed <mark> — treat rest as plain text
|
|
result.push_str(&remaining[start..]);
|
|
break;
|
|
}
|
|
} else {
|
|
// No more tags — handle remaining plain text
|
|
let rest_len = remaining.chars().count();
|
|
if visible_count + rest_len > max_visible && max_visible > 3 {
|
|
let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
|
|
let truncated: String = remaining.chars().take(take).collect();
|
|
result.push_str(&truncated);
|
|
result.push_str("...");
|
|
return result;
|
|
}
|
|
result.push_str(remaining);
|
|
break;
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Render FTS snippet with `<mark>` tags as terminal highlight style.
|
|
fn render_snippet(snippet: &str) -> String {
|
|
let mut result = String::new();
|
|
let mut remaining = snippet;
|
|
while let Some(start) = remaining.find("<mark>") {
|
|
result.push_str(&Theme::muted().render(&remaining[..start]));
|
|
remaining = &remaining[start + 6..];
|
|
if let Some(end) = remaining.find("</mark>") {
|
|
let highlighted = &remaining[..end];
|
|
result.push_str(&Theme::highlight().render(highlighted));
|
|
remaining = &remaining[end + 7..];
|
|
}
|
|
}
|
|
result.push_str(&Theme::muted().render(remaining));
|
|
result
|
|
}
|
|
|
|
pub fn print_search_results(response: &SearchResponse, explain: bool) {
|
|
if !response.warnings.is_empty() {
|
|
for w in &response.warnings {
|
|
eprintln!("{} {}", Theme::warning().render("Warning:"), w);
|
|
}
|
|
}
|
|
|
|
if response.results.is_empty() {
|
|
println!(
|
|
"No results found for '{}'",
|
|
Theme::bold().render(&response.query)
|
|
);
|
|
return;
|
|
}
|
|
|
|
// Phase 6: section divider header
|
|
println!(
|
|
"{}",
|
|
render::section_divider(&format!(
|
|
"{} results for '{}' {}",
|
|
response.total_results, response.query, response.mode
|
|
))
|
|
);
|
|
|
|
for (i, result) in response.results.iter().enumerate() {
|
|
println!();
|
|
|
|
let type_badge = match result.source_type.as_str() {
|
|
"issue" => Theme::issue_ref().render("issue"),
|
|
"merge_request" => Theme::mr_ref().render(" mr "),
|
|
"discussion" => Theme::info().render(" disc"),
|
|
"note" => Theme::muted().render(" note"),
|
|
_ => Theme::muted().render(&format!("{:>5}", &result.source_type)),
|
|
};
|
|
|
|
// Phase 1: entity ref (e.g. #42 or !99)
|
|
let entity_ref = result
|
|
.source_entity_iid
|
|
.map(|iid| match result.source_type.as_str() {
|
|
"issue" | "discussion" | "note" => Theme::issue_ref().render(&format!("#{iid}")),
|
|
"merge_request" => Theme::mr_ref().render(&format!("!{iid}")),
|
|
_ => String::new(),
|
|
});
|
|
|
|
// Phase 3: relative time
|
|
let time_str = result
|
|
.updated_at_ms
|
|
.map(|ms| Theme::dim().render(&render::format_relative_time_compact(ms)));
|
|
|
|
// Phase 2: build prefix, compute indent from its visible width
|
|
let prefix = format!(" {:>3}. {} ", i + 1, type_badge);
|
|
let indent = " ".repeat(render::visible_width(&prefix));
|
|
|
|
// Title line: rank, type badge, entity ref, title, relative time
|
|
let mut title_line = prefix;
|
|
if let Some(ref eref) = entity_ref {
|
|
title_line.push_str(eref);
|
|
title_line.push_str(" ");
|
|
}
|
|
title_line.push_str(&Theme::bold().render(&result.title));
|
|
if let Some(ref time) = time_str {
|
|
title_line.push_str(" ");
|
|
title_line.push_str(time);
|
|
}
|
|
println!("{title_line}");
|
|
|
|
// Metadata: project, author — compact middle-dot line
|
|
let sep = Theme::muted().render(" \u{b7} ");
|
|
let mut meta_parts: Vec<String> = Vec::new();
|
|
meta_parts.push(Theme::muted().render(&result.project_path));
|
|
if let Some(ref author) = result.author {
|
|
meta_parts.push(Theme::username().render(&format!("@{author}")));
|
|
}
|
|
println!("{indent}{}", meta_parts.join(&sep));
|
|
|
|
// Phase 5: limit snippet to ~2 terminal lines.
|
|
// First collapse newlines — content_text includes multi-line metadata
|
|
// (Project:, URL:, Labels:, etc.) that would print at column 0.
|
|
let collapsed = collapse_newlines(&result.snippet);
|
|
// Truncate based on visible text length (excluding <mark></mark> tags)
|
|
// to avoid cutting inside a highlight tag pair.
|
|
let max_snippet_width =
|
|
render::terminal_width().saturating_sub(render::visible_width(&indent));
|
|
let max_snippet_chars = max_snippet_width.saturating_mul(2);
|
|
let snippet = truncate_snippet(&collapsed, max_snippet_chars);
|
|
let rendered = render_snippet(&snippet);
|
|
println!("{indent}{rendered}");
|
|
|
|
if let Some(ref explain_data) = result.explain {
|
|
let mut explain_line = format!(
|
|
"{indent}{} vec={} fts={} rrf={:.4}",
|
|
Theme::accent().render("explain"),
|
|
explain_data
|
|
.vector_rank
|
|
.map(|r| r.to_string())
|
|
.unwrap_or_else(|| "-".into()),
|
|
explain_data
|
|
.fts_rank
|
|
.map(|r| r.to_string())
|
|
.unwrap_or_else(|| "-".into()),
|
|
explain_data.rrf_score
|
|
);
|
|
// Phase 5: labels shown only in explain mode
|
|
if explain && !result.labels.is_empty() {
|
|
let label_str = if result.labels.len() <= 3 {
|
|
result.labels.join(", ")
|
|
} else {
|
|
format!(
|
|
"{} +{}",
|
|
result.labels[..2].join(", "),
|
|
result.labels.len() - 2
|
|
)
|
|
};
|
|
explain_line.push_str(&format!(" {}", Theme::muted().render(&label_str)));
|
|
}
|
|
println!("{explain_line}");
|
|
}
|
|
}
|
|
|
|
// Phase 4: drill-down hint footer
|
|
if let Some(first) = response.results.first()
|
|
&& let Some(iid) = first.source_entity_iid
|
|
{
|
|
let cmd = match first.source_type.as_str() {
|
|
"issue" | "discussion" | "note" => Some(format!("lore issues {iid}")),
|
|
"merge_request" => Some(format!("lore mrs {iid}")),
|
|
_ => None,
|
|
};
|
|
if let Some(cmd) = cmd {
|
|
println!(
|
|
"\n {} {}",
|
|
Theme::dim().render("Tip:"),
|
|
Theme::dim().render(&format!("{cmd} for details"))
|
|
);
|
|
}
|
|
}
|
|
|
|
println!();
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct SearchJsonOutput<'a> {
|
|
ok: bool,
|
|
data: &'a SearchResponse,
|
|
meta: SearchMeta,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct SearchMeta {
|
|
elapsed_ms: u64,
|
|
}
|
|
|
|
pub fn print_search_results_json(
|
|
response: &SearchResponse,
|
|
elapsed_ms: u64,
|
|
fields: Option<&[String]>,
|
|
) {
|
|
let output = SearchJsonOutput {
|
|
ok: true,
|
|
data: response,
|
|
meta: SearchMeta { elapsed_ms },
|
|
};
|
|
let mut value = match serde_json::to_value(&output) {
|
|
Ok(v) => v,
|
|
Err(e) => {
|
|
eprintln!("Error serializing search response: {e}");
|
|
return;
|
|
}
|
|
};
|
|
if let Some(f) = fields {
|
|
let expanded = crate::cli::robot::expand_fields_preset(f, "search");
|
|
crate::cli::robot::filter_fields(&mut value, "results", &expanded);
|
|
}
|
|
match serde_json::to_string(&value) {
|
|
Ok(json) => println!("{json}"),
|
|
Err(e) => eprintln!("Error serializing to JSON: {e}"),
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn truncate_snippet_short_text_unchanged() {
|
|
let s = "hello world";
|
|
assert_eq!(truncate_snippet(s, 100), "hello world");
|
|
}
|
|
|
|
#[test]
|
|
fn truncate_snippet_plain_text_truncated() {
|
|
let s = "this is a long string that exceeds the limit";
|
|
let result = truncate_snippet(s, 20);
|
|
assert!(result.ends_with("..."), "got: {result}");
|
|
// Visible chars should be <= 20
|
|
assert!(result.chars().count() <= 20, "got: {result}");
|
|
}
|
|
|
|
#[test]
|
|
fn truncate_snippet_preserves_mark_tags() {
|
|
let s = "some text <mark>keyword</mark> and more text here that is long";
|
|
let result = truncate_snippet(s, 30);
|
|
// Should not cut inside a <mark> pair
|
|
let open_count = result.matches("<mark>").count();
|
|
let close_count = result.matches("</mark>").count();
|
|
assert_eq!(open_count, close_count, "unbalanced tags in: {result}");
|
|
}
|
|
|
|
#[test]
|
|
fn truncate_snippet_cuts_before_mark_tag() {
|
|
let s = "a]very long prefix that exceeds the limit <mark>word</mark>";
|
|
let result = truncate_snippet(s, 15);
|
|
assert!(result.ends_with("..."), "got: {result}");
|
|
// The <mark> tag should not appear since we truncated before reaching it
|
|
assert!(
|
|
!result.contains("<mark>"),
|
|
"should not include tag: {result}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn truncate_snippet_does_not_count_tags_as_visible() {
|
|
// With tags, raw length is 42 chars. Without tags, visible is 29.
|
|
let s = "prefix <mark>keyword</mark> suffix text";
|
|
// If max_visible = 35, the visible text (29 chars) fits — should NOT truncate
|
|
let result = truncate_snippet(s, 35);
|
|
assert_eq!(result, s, "should not truncate when visible text fits");
|
|
}
|
|
|
|
#[test]
|
|
fn truncate_snippet_small_limit_returns_as_is() {
|
|
let s = "text <mark>x</mark>";
|
|
// Very small limit should return as-is (guard clause)
|
|
assert_eq!(truncate_snippet(s, 3), s);
|
|
}
|
|
|
|
#[test]
|
|
fn collapse_newlines_flattens_multiline_metadata() {
|
|
let s = "[[Issue]] #4018: Remove math.js\nProject: vs/typescript-code\nURL: https://example.com\nLabels: []";
|
|
let result = collapse_newlines(s);
|
|
assert!(
|
|
!result.contains('\n'),
|
|
"should not contain newlines: {result}"
|
|
);
|
|
assert_eq!(
|
|
result,
|
|
"[[Issue]] #4018: Remove math.js Project: vs/typescript-code URL: https://example.com Labels: []"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn collapse_newlines_preserves_mark_tags() {
|
|
let s = "first line\n<mark>keyword</mark>\nsecond line";
|
|
let result = collapse_newlines(s);
|
|
assert_eq!(result, "first line <mark>keyword</mark> second line");
|
|
}
|
|
|
|
#[test]
|
|
fn collapse_newlines_collapses_runs_of_whitespace() {
|
|
let s = "a \n\n b\t\tc";
|
|
let result = collapse_newlines(s);
|
|
assert_eq!(result, "a b c");
|
|
}
|
|
}
|