gitlore/src/cli/commands/search.rs

use std::collections::HashMap;

use crate::cli::render::{self, Theme};
use serde::Serialize;

use crate::Config;
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, parse_since};
use crate::documents::SourceType;
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::search::{
    FtsQueryMode, HybridResult, PathFilter, SearchFilters, SearchMode, get_result_snippet,
    search_fts, search_hybrid,
};

#[derive(Debug, Serialize)]
pub struct SearchResultDisplay {
    pub document_id: i64,
    pub source_type: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub source_entity_iid: Option<i64>,
    pub title: String,
    pub url: Option<String>,
    pub author: Option<String>,
    pub created_at: Option<String>,
    pub updated_at: Option<String>,
    /// Raw epoch ms for human rendering; not serialized to JSON.
    #[serde(skip)]
    pub updated_at_ms: Option<i64>,
    pub project_path: String,
    pub labels: Vec<String>,
    pub paths: Vec<String>,
    pub snippet: String,
    pub score: f64,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub explain: Option<ExplainData>,
}

#[derive(Debug, Serialize)]
pub struct ExplainData {
    pub vector_rank: Option<usize>,
    pub fts_rank: Option<usize>,
    pub rrf_score: f64,
}

#[derive(Debug, Serialize)]
pub struct SearchResponse {
    pub query: String,
    pub mode: String,
    pub total_results: usize,
    pub results: Vec<SearchResultDisplay>,
    pub warnings: Vec<String>,
}

pub struct SearchCliFilters {
    pub source_type: Option<String>,
    pub author: Option<String>,
    pub project: Option<String>,
    pub labels: Vec<String>,
    pub path: Option<String>,
    pub since: Option<String>,
    pub updated_since: Option<String>,
    pub limit: usize,
}

pub async fn run_search(
    config: &Config,
    query: &str,
    cli_filters: SearchCliFilters,
    fts_mode: FtsQueryMode,
    requested_mode: &str,
    explain: bool,
) -> Result<SearchResponse> {
    let db_path = get_db_path(config.storage.db_path.as_deref());
    let conn = create_connection(&db_path)?;

    let mut warnings: Vec<String> = Vec::new();

    let actual_mode = SearchMode::parse(requested_mode).unwrap_or(SearchMode::Hybrid);

    let client = if actual_mode != SearchMode::Lexical {
        let ollama_cfg = &config.embedding;
        Some(OllamaClient::new(OllamaConfig {
            base_url: ollama_cfg.base_url.clone(),
            model: ollama_cfg.model.clone(),
            ..OllamaConfig::default()
        }))
    } else {
        None
    };

    let doc_count: i64 = conn
        .query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0))
        .unwrap_or(0);

    if doc_count == 0 {
        warnings.push("No documents indexed. Run 'lore generate-docs' first.".to_string());
        return Ok(SearchResponse {
            query: query.to_string(),
            mode: actual_mode.as_str().to_string(),
            total_results: 0,
            results: vec![],
            warnings,
        });
    }

    let source_type = cli_filters
        .source_type
        .as_deref()
        .and_then(SourceType::parse);

    let project_id = cli_filters
        .project
        .as_deref()
        .map(|p| resolve_project(&conn, p))
        .transpose()?;

    let since = cli_filters
        .since
        .as_deref()
        .map(|s| {
            parse_since(s).ok_or_else(|| {
                LoreError::Other(format!(
                    "Invalid --since value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.",
                    s
                ))
            })
        })
        .transpose()?;
    let updated_since = cli_filters
        .updated_since
        .as_deref()
        .map(|s| {
            parse_since(s).ok_or_else(|| {
                LoreError::Other(format!(
                    "Invalid --updated-since value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.",
                    s
                ))
            })
        })
        .transpose()?;

    let path = cli_filters.path.as_deref().map(|p| {
        if p.ends_with('/') {
            PathFilter::Prefix(p.to_string())
        } else {
            PathFilter::Exact(p.to_string())
        }
    });

    let filters = SearchFilters {
        source_type,
        author: cli_filters.author,
        project_id,
        since,
        updated_since,
        labels: cli_filters.labels,
        path,
        limit: cli_filters.limit,
    };

    // Run FTS separately for snippet extraction (search_hybrid doesn't return snippets).
    let snippet_top_k = filters
        .clamp_limit()
        .checked_mul(10)
        .unwrap_or(500)
        .clamp(50, 1500);
    let fts_results = search_fts(&conn, query, snippet_top_k, fts_mode)?;
    let snippet_map: HashMap<i64, String> = fts_results
        .iter()
        .map(|r| (r.document_id, r.snippet.clone()))
        .collect();

    // search_hybrid handles recall sizing, RRF ranking, and filter application internally.
    let (hybrid_results, mut hybrid_warnings) = search_hybrid(
        &conn,
        client.as_ref(),
        query,
        actual_mode,
        &filters,
        fts_mode,
    )
    .await?;
    warnings.append(&mut hybrid_warnings);

    if hybrid_results.is_empty() {
        return Ok(SearchResponse {
            query: query.to_string(),
            mode: actual_mode.as_str().to_string(),
            total_results: 0,
            results: vec![],
            warnings,
        });
    }

    let ranked_ids: Vec<i64> = hybrid_results.iter().map(|r| r.document_id).collect();
    let hydrated = hydrate_results(&conn, &ranked_ids)?;

    let hybrid_map: HashMap<i64, &HybridResult> =
        hybrid_results.iter().map(|r| (r.document_id, r)).collect();

    let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len());
    for row in &hydrated {
        let hr = hybrid_map.get(&row.document_id);
        let fts_snippet = snippet_map.get(&row.document_id).map(|s| s.as_str());
        let snippet = get_result_snippet(fts_snippet, &row.content_text);

        let explain_data = if explain {
            hr.map(|r| ExplainData {
                vector_rank: r.vector_rank,
                fts_rank: r.fts_rank,
                rrf_score: r.rrf_score,
            })
        } else {
            None
        };

        results.push(SearchResultDisplay {
            document_id: row.document_id,
            source_type: row.source_type.clone(),
            source_entity_iid: row.source_entity_iid,
            title: row.title.clone().unwrap_or_default(),
            url: row.url.clone(),
            author: row.author.clone(),
            created_at: row.created_at.map(ms_to_iso),
            updated_at: row.updated_at.map(ms_to_iso),
            updated_at_ms: row.updated_at,
            project_path: row.project_path.clone(),
            labels: row.labels.clone(),
            paths: row.paths.clone(),
            snippet,
            score: hr.map(|r| r.score).unwrap_or(0.0),
            explain: explain_data,
        });
    }

    Ok(SearchResponse {
        query: query.to_string(),
        mode: actual_mode.as_str().to_string(),
        total_results: results.len(),
        results,
        warnings,
    })
}

struct HydratedRow {
    document_id: i64,
    source_type: String,
    source_entity_iid: Option<i64>,
    title: Option<String>,
    url: Option<String>,
    author: Option<String>,
    created_at: Option<i64>,
    updated_at: Option<i64>,
    content_text: String,
    project_path: String,
    labels: Vec<String>,
    paths: Vec<String>,
}

fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result<Vec<HydratedRow>> {
    if document_ids.is_empty() {
        return Ok(Vec::new());
    }

    let ids_json =
        serde_json::to_string(document_ids).map_err(|e| LoreError::Other(e.to_string()))?;

    let sql = r#"
        SELECT d.id, d.source_type, d.title, d.url, d.author_username,
               d.created_at, d.updated_at, d.content_text,
               p.path_with_namespace AS project_path,
               (SELECT json_group_array(dl.label_name)
                FROM document_labels dl WHERE dl.document_id = d.id) AS labels_json,
               (SELECT json_group_array(dp.path)
                FROM document_paths dp WHERE dp.document_id = d.id) AS paths_json,
               CASE d.source_type
                 WHEN 'issue' THEN
                   (SELECT i.iid FROM issues i WHERE i.id = d.source_id)
                 WHEN 'merge_request' THEN
                   (SELECT m.iid FROM merge_requests m WHERE m.id = d.source_id)
                 WHEN 'discussion' THEN
                   (SELECT COALESCE(
                     (SELECT i.iid FROM issues i WHERE i.id = disc.issue_id),
                     (SELECT m.iid FROM merge_requests m WHERE m.id = disc.merge_request_id)
                   ) FROM discussions disc WHERE disc.id = d.source_id)
                 WHEN 'note' THEN
                   (SELECT COALESCE(
                     (SELECT i.iid FROM issues i WHERE i.id = disc.issue_id),
                     (SELECT m.iid FROM merge_requests m WHERE m.id = disc.merge_request_id)
                   ) FROM notes n
                   JOIN discussions disc ON disc.id = n.discussion_id
                   WHERE n.id = d.source_id)
                 ELSE NULL
               END AS source_entity_iid
        FROM json_each(?1) AS j
        JOIN documents d ON d.id = j.value
        JOIN projects p ON p.id = d.project_id
        ORDER BY j.key
    "#;

    let mut stmt = conn.prepare(sql)?;
    let rows = stmt
        .query_map([ids_json], |row| {
            let labels_json: String = row.get(9)?;
            let paths_json: String = row.get(10)?;

            Ok(HydratedRow {
                document_id: row.get(0)?,
                source_type: row.get(1)?,
                title: row.get(2)?,
                url: row.get(3)?,
                author: row.get(4)?,
                created_at: row.get(5)?,
                updated_at: row.get(6)?,
                content_text: row.get(7)?,
                project_path: row.get(8)?,
                labels: parse_json_array(&labels_json),
                paths: parse_json_array(&paths_json),
                source_entity_iid: row.get(11)?,
            })
        })?
        .collect::<std::result::Result<Vec<_>, _>>()?;

    Ok(rows)
}

fn parse_json_array(json: &str) -> Vec<String> {
    serde_json::from_str::<Vec<serde_json::Value>>(json)
        .unwrap_or_default()
        .into_iter()
        .filter_map(|v| v.as_str().map(|s| s.to_string()))
        .filter(|s| !s.is_empty())
        .collect()
}

/// Collapse newlines and runs of whitespace in a snippet into single spaces.
///
/// Document `content_text` includes multi-line metadata (Project:, URL:, Labels:, etc.).
/// FTS5 snippet() preserves these newlines, causing unindented lines when rendered.
fn collapse_newlines(s: &str) -> String {
    let mut result = String::with_capacity(s.len());
    let mut prev_was_space = false;
    for c in s.chars() {
        if c.is_whitespace() {
            if !prev_was_space {
                result.push(' ');
                prev_was_space = true;
            }
        } else {
            result.push(c);
            prev_was_space = false;
        }
    }
    result
}

/// Truncate a snippet to `max_visible` visible characters, respecting `<mark>` tag boundaries.
///
/// Counts only visible text (not tags) toward the limit, and ensures we never cut
/// inside a `<mark>...</mark>` pair (which would break `render_snippet` highlighting).
fn truncate_snippet(snippet: &str, max_visible: usize) -> String {
    if max_visible < 4 {
        return snippet.to_string();
    }

    let mut visible_count = 0;
    let mut result = String::new();
    let mut remaining = snippet;

    while !remaining.is_empty() {
        if let Some(start) = remaining.find("<mark>") {
            // Count visible chars before the tag
            let before = &remaining[..start];
            let before_len = before.chars().count();
            if visible_count + before_len >= max_visible.saturating_sub(3) {
                // Truncate within the pre-tag text
                let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
                let truncated: String = before.chars().take(take).collect();
                result.push_str(&truncated);
                result.push_str("...");
                return result;
            }
            result.push_str(before);
            visible_count += before_len;

            // Find matching </mark>
            let after_open = &remaining[start + 6..];
            if let Some(end) = after_open.find("</mark>") {
                let highlighted = &after_open[..end];
                let hl_len = highlighted.chars().count();
                if visible_count + hl_len >= max_visible.saturating_sub(3) {
                    // Truncate within the highlighted text
                    let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
                    let truncated: String = highlighted.chars().take(take).collect();
                    result.push_str("<mark>");
                    result.push_str(&truncated);
                    result.push_str("</mark>...");
                    return result;
                }
                result.push_str(&remaining[start..start + 6 + end + 7]);
                visible_count += hl_len;
                remaining = &after_open[end + 7..];
            } else {
                // Unclosed <mark> — treat rest as plain text
                result.push_str(&remaining[start..]);
                break;
            }
        } else {
            // No more tags — handle remaining plain text
            let rest_len = remaining.chars().count();
            if visible_count + rest_len > max_visible && max_visible > 3 {
                let take = max_visible.saturating_sub(3).saturating_sub(visible_count);
                let truncated: String = remaining.chars().take(take).collect();
                result.push_str(&truncated);
                result.push_str("...");
                return result;
            }
            result.push_str(remaining);
            break;
        }
    }

    result
}

/// Render FTS snippet with `<mark>` tags as terminal highlight style.
fn render_snippet(snippet: &str) -> String {
    let mut result = String::new();
    let mut remaining = snippet;
    while let Some(start) = remaining.find("<mark>") {
        result.push_str(&Theme::muted().render(&remaining[..start]));
        remaining = &remaining[start + 6..];
        if let Some(end) = remaining.find("</mark>") {
            let highlighted = &remaining[..end];
            result.push_str(&Theme::highlight().render(highlighted));
            remaining = &remaining[end + 7..];
        }
    }
    result.push_str(&Theme::muted().render(remaining));
    result
}

pub fn print_search_results(response: &SearchResponse, explain: bool) {
    if !response.warnings.is_empty() {
        for w in &response.warnings {
            eprintln!("{} {}", Theme::warning().render("Warning:"), w);
        }
    }

    if response.results.is_empty() {
        println!(
            "No results found for '{}'",
            Theme::bold().render(&response.query)
        );
        return;
    }

    // Phase 6: section divider header
    println!(
        "{}",
        render::section_divider(&format!(
            "{} results for '{}'  {}",
            response.total_results, response.query, response.mode
        ))
    );

    for (i, result) in response.results.iter().enumerate() {
        println!();

        let type_badge = match result.source_type.as_str() {
            "issue" => Theme::issue_ref().render("issue"),
            "merge_request" => Theme::mr_ref().render("  mr "),
            "discussion" => Theme::info().render(" disc"),
            "note" => Theme::muted().render(" note"),
            _ => Theme::muted().render(&format!("{:>5}", &result.source_type)),
        };

        // Phase 1: entity ref (e.g. #42 or !99)
        let entity_ref = result
            .source_entity_iid
            .map(|iid| match result.source_type.as_str() {
                "issue" | "discussion" | "note" => Theme::issue_ref().render(&format!("#{iid}")),
                "merge_request" => Theme::mr_ref().render(&format!("!{iid}")),
                _ => String::new(),
            });

        // Phase 3: relative time
        let time_str = result
            .updated_at_ms
            .map(|ms| Theme::dim().render(&render::format_relative_time_compact(ms)));

        // Phase 2: build prefix, compute indent from its visible width
        let prefix = format!("  {:>3}.  {}  ", i + 1, type_badge);
        let indent = " ".repeat(render::visible_width(&prefix));

        // Title line: rank, type badge, entity ref, title, relative time
        let mut title_line = prefix;
        if let Some(ref eref) = entity_ref {
            title_line.push_str(eref);
            title_line.push_str("  ");
        }
        title_line.push_str(&Theme::bold().render(&result.title));
        if let Some(ref time) = time_str {
            title_line.push_str("  ");
            title_line.push_str(time);
        }
        println!("{title_line}");

        // Metadata: project, author — compact middle-dot line
        let sep = Theme::muted().render(" \u{b7} ");
        let mut meta_parts: Vec<String> = Vec::new();
        meta_parts.push(Theme::muted().render(&result.project_path));
        if let Some(ref author) = result.author {
            meta_parts.push(Theme::username().render(&format!("@{author}")));
        }
        println!("{indent}{}", meta_parts.join(&sep));

        // Phase 5: limit snippet to ~2 terminal lines.
        // First collapse newlines — content_text includes multi-line metadata
        // (Project:, URL:, Labels:, etc.) that would print at column 0.
        let collapsed = collapse_newlines(&result.snippet);
        // Truncate based on visible text length (excluding <mark></mark> tags)
        // to avoid cutting inside a highlight tag pair.
        let max_snippet_width =
            render::terminal_width().saturating_sub(render::visible_width(&indent));
        let max_snippet_chars = max_snippet_width.saturating_mul(2);
        let snippet = truncate_snippet(&collapsed, max_snippet_chars);
        let rendered = render_snippet(&snippet);
        println!("{indent}{rendered}");

        if let Some(ref explain_data) = result.explain {
            let mut explain_line = format!(
                "{indent}{} vec={} fts={} rrf={:.4}",
                Theme::accent().render("explain"),
                explain_data
                    .vector_rank
                    .map(|r| r.to_string())
                    .unwrap_or_else(|| "-".into()),
                explain_data
                    .fts_rank
                    .map(|r| r.to_string())
                    .unwrap_or_else(|| "-".into()),
                explain_data.rrf_score
            );
            // Phase 5: labels shown only in explain mode
            if explain && !result.labels.is_empty() {
                let label_str = if result.labels.len() <= 3 {
                    result.labels.join(", ")
                } else {
                    format!(
                        "{} +{}",
                        result.labels[..2].join(", "),
                        result.labels.len() - 2
                    )
                };
                explain_line.push_str(&format!("  {}", Theme::muted().render(&label_str)));
            }
            println!("{explain_line}");
        }
    }

    // Phase 4: drill-down hint footer
    if let Some(first) = response.results.first()
        && let Some(iid) = first.source_entity_iid
    {
        let cmd = match first.source_type.as_str() {
            "issue" | "discussion" | "note" => Some(format!("lore issues {iid}")),
            "merge_request" => Some(format!("lore mrs {iid}")),
            _ => None,
        };
        if let Some(cmd) = cmd {
            println!(
                "\n  {} {}",
                Theme::dim().render("Tip:"),
                Theme::dim().render(&format!("{cmd} for details"))
            );
        }
    }

    println!();
}

#[derive(Serialize)]
struct SearchJsonOutput<'a> {
    ok: bool,
    data: &'a SearchResponse,
    meta: SearchMeta,
}

#[derive(Serialize)]
struct SearchMeta {
    elapsed_ms: u64,
}

pub fn print_search_results_json(
    response: &SearchResponse,
    elapsed_ms: u64,
    fields: Option<&[String]>,
) {
    let output = SearchJsonOutput {
        ok: true,
        data: response,
        meta: SearchMeta { elapsed_ms },
    };
    let mut value = match serde_json::to_value(&output) {
        Ok(v) => v,
        Err(e) => {
            eprintln!("Error serializing search response: {e}");
            return;
        }
    };
    if let Some(f) = fields {
        let expanded = crate::cli::robot::expand_fields_preset(f, "search");
        crate::cli::robot::filter_fields(&mut value, "results", &expanded);
    }
    match serde_json::to_string(&value) {
        Ok(json) => println!("{json}"),
        Err(e) => eprintln!("Error serializing to JSON: {e}"),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn truncate_snippet_short_text_unchanged() {
        let s = "hello world";
        assert_eq!(truncate_snippet(s, 100), "hello world");
    }

    #[test]
    fn truncate_snippet_plain_text_truncated() {
        let s = "this is a long string that exceeds the limit";
        let result = truncate_snippet(s, 20);
        assert!(result.ends_with("..."), "got: {result}");
        // Visible chars should be <= 20
        assert!(result.chars().count() <= 20, "got: {result}");
    }

    #[test]
    fn truncate_snippet_preserves_mark_tags() {
        let s = "some text <mark>keyword</mark> and more text here that is long";
        let result = truncate_snippet(s, 30);
        // Should not cut inside a <mark> pair
        let open_count = result.matches("<mark>").count();
        let close_count = result.matches("</mark>").count();
        assert_eq!(open_count, close_count, "unbalanced tags in: {result}");
    }

    #[test]
    fn truncate_snippet_cuts_before_mark_tag() {
        let s = "a]very long prefix that exceeds the limit <mark>word</mark>";
        let result = truncate_snippet(s, 15);
        assert!(result.ends_with("..."), "got: {result}");
        // The <mark> tag should not appear since we truncated before reaching it
        assert!(
            !result.contains("<mark>"),
            "should not include tag: {result}"
        );
    }

    #[test]
    fn truncate_snippet_does_not_count_tags_as_visible() {
        // With tags, raw length is 42 chars. Without tags, visible is 29.
        let s = "prefix <mark>keyword</mark> suffix text";
        // If max_visible = 35, the visible text (29 chars) fits — should NOT truncate
        let result = truncate_snippet(s, 35);
        assert_eq!(result, s, "should not truncate when visible text fits");
    }

    #[test]
    fn truncate_snippet_small_limit_returns_as_is() {
        let s = "text <mark>x</mark>";
        // Very small limit should return as-is (guard clause)
        assert_eq!(truncate_snippet(s, 3), s);
    }

    #[test]
    fn collapse_newlines_flattens_multiline_metadata() {
        let s = "[[Issue]] #4018: Remove math.js\nProject: vs/typescript-code\nURL: https://example.com\nLabels: []";
        let result = collapse_newlines(s);
        assert!(
            !result.contains('\n'),
            "should not contain newlines: {result}"
        );
        assert_eq!(
            result,
            "[[Issue]] #4018: Remove math.js Project: vs/typescript-code URL: https://example.com Labels: []"
        );
    }

    #[test]
    fn collapse_newlines_preserves_mark_tags() {
        let s = "first line\n<mark>keyword</mark>\nsecond line";
        let result = collapse_newlines(s);
        assert_eq!(result, "first line <mark>keyword</mark> second line");
    }

    #[test]
    fn collapse_newlines_collapses_runs_of_whitespace() {
        let s = "a  \n\n  b\t\tc";
        let result = collapse_newlines(s);
        assert_eq!(result, "a b c");
    }
}