diff --git a/src/cli/commands/timeline.rs b/src/cli/commands/timeline.rs index d8fcf3b..beb3989 100644 --- a/src/cli/commands/timeline.rs +++ b/src/cli/commands/timeline.rs @@ -13,6 +13,7 @@ use crate::core::timeline::{ use crate::core::timeline_collect::collect_events; use crate::core::timeline_expand::expand_timeline; use crate::core::timeline_seed::seed_timeline; +use crate::embedding::ollama::{OllamaClient, OllamaConfig}; /// Parameters for running the timeline pipeline. pub struct TimelineParams { @@ -28,7 +29,7 @@ pub struct TimelineParams { } /// Run the full timeline pipeline: SEED -> EXPAND -> COLLECT. -pub fn run_timeline(config: &Config, params: &TimelineParams) -> Result { +pub async fn run_timeline(config: &Config, params: &TimelineParams) -> Result { let db_path = get_db_path(config.storage.db_path.as_deref()); let conn = create_connection(&db_path)?; @@ -50,15 +51,25 @@ pub fn run_timeline(config: &Config, params: &TimelineParams) -> Result Result, /// Total events before the `--limit` was applied (for meta.total_events vs meta.showing). #[serde(skip)] diff --git a/src/core/timeline_seed.rs b/src/core/timeline_seed.rs index 8b0702b..1610211 100644 --- a/src/core/timeline_seed.rs +++ b/src/core/timeline_seed.rs @@ -5,23 +5,28 @@ use tracing::debug; use crate::core::error::Result; use crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType, resolve_entity_ref}; -use crate::search::{FtsQueryMode, to_fts_query}; +use crate::embedding::ollama::OllamaClient; +use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_fts_query}; /// Result of the seed + hydrate phases. pub struct SeedResult { pub seed_entities: Vec, pub evidence_notes: Vec, + /// The search mode actually used (hybrid with fallback info). + pub search_mode: String, } /// Run the SEED + HYDRATE phases of the timeline pipeline. /// -/// 1. SEED: FTS5 keyword search over documents -> matched document IDs +/// 1. SEED: Hybrid search (FTS + vector via RRF) over documents -> matched document IDs /// 2. HYDRATE: Map document IDs -> source entities + top matched notes as evidence /// +/// When `client` is `None` or Ollama is unavailable, falls back to FTS-only search. /// Discussion documents are resolved to their parent entity (issue or MR). /// Entities are deduplicated. Evidence notes are capped at `max_evidence`. -pub fn seed_timeline( +pub async fn seed_timeline( conn: &Connection, + client: Option<&OllamaClient>, query: &str, project_id: Option, since_ms: Option, @@ -33,57 +38,110 @@ pub fn seed_timeline( return Ok(SeedResult { seed_entities: Vec::new(), evidence_notes: Vec::new(), + search_mode: "lexical".to_owned(), }); } - let seed_entities = find_seed_entities(conn, &fts_query, project_id, since_ms, max_seeds)?; + // Use hybrid search for seed entity discovery (better recall than FTS alone). + // search_hybrid gracefully falls back to FTS-only when Ollama is unavailable. + let filters = SearchFilters { + project_id, + updated_since: since_ms, + limit: max_seeds.saturating_mul(3), + ..SearchFilters::default() + }; + + let (hybrid_results, warnings) = search_hybrid( + conn, + client, + query, + SearchMode::Hybrid, + &filters, + FtsQueryMode::Safe, + ) + .await?; + + let search_mode = if warnings + .iter() + .any(|w| w.contains("falling back") || w.contains("FTS only")) + { + "lexical (hybrid fallback)".to_owned() + } else if client.is_some() && !hybrid_results.is_empty() { + "hybrid".to_owned() + } else { + "lexical".to_owned() + }; + + for w in &warnings { + debug!(warning = %w, "hybrid search warning during timeline seeding"); + } + + let seed_entities = resolve_documents_to_entities( + conn, + &hybrid_results + .iter() + .map(|r| r.document_id) + .collect::>(), + max_seeds, + )?; + + // Evidence notes stay FTS-only (supplementary context, not worth a second embedding call) let evidence_notes = find_evidence_notes(conn, &fts_query, project_id, since_ms, max_evidence)?; Ok(SeedResult { seed_entities, evidence_notes, + search_mode, }) } -/// Find seed entities via FTS5 search, resolving discussions to their parent entity. -fn find_seed_entities( +/// Resolve a list of document IDs to deduplicated entity refs. +/// Discussion documents are resolved to their parent entity (issue or MR). +fn resolve_documents_to_entities( conn: &Connection, - fts_query: &str, - project_id: Option, - since_ms: Option, - max_seeds: usize, + document_ids: &[i64], + max_entities: usize, ) -> Result> { - let sql = r" + if document_ids.is_empty() { + return Ok(Vec::new()); + } + + let placeholders: String = document_ids + .iter() + .map(|_| "?") + .collect::>() + .join(","); + let sql = format!( + r" SELECT d.source_type, d.source_id, d.project_id, disc.issue_id, disc.merge_request_id - FROM documents_fts - JOIN documents d ON d.id = documents_fts.rowid + FROM documents d LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion' - WHERE documents_fts MATCH ?1 - AND (?2 IS NULL OR d.project_id = ?2) - AND (?3 IS NULL OR d.updated_at >= ?3) - ORDER BY rank - LIMIT ?4 - "; + WHERE d.id IN ({placeholders}) + ORDER BY CASE d.id {order_clause} END + ", + order_clause = document_ids + .iter() + .enumerate() + .map(|(i, id)| format!("WHEN {id} THEN {i}")) + .collect::>() + .join(" "), + ); - let mut stmt = conn.prepare(sql)?; - let rows = stmt.query_map( - rusqlite::params![ - fts_query, - project_id, - since_ms, - max_seeds.saturating_mul(3) as i64 - ], - |row| { - Ok(( - row.get::<_, String>(0)?, - row.get::<_, i64>(1)?, - row.get::<_, i64>(2)?, - row.get::<_, Option>(3)?, - row.get::<_, Option>(4)?, - )) - }, - )?; + let mut stmt = conn.prepare(&sql)?; + let params: Vec<&dyn rusqlite::types::ToSql> = document_ids + .iter() + .map(|id| id as &dyn rusqlite::types::ToSql) + .collect(); + let rows = stmt.query_map(params.as_slice(), |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, i64>(1)?, + row.get::<_, i64>(2)?, + row.get::<_, Option>(3)?, + row.get::<_, Option>(4)?, + )) + })?; let mut seen = HashSet::new(); let mut entities = Vec::new(); @@ -116,7 +174,7 @@ fn find_seed_entities( entities.push(entity_ref); } - if entities.len() >= max_seeds { + if entities.len() >= max_entities { break; } } diff --git a/src/core/timeline_seed_tests.rs b/src/core/timeline_seed_tests.rs index 0256b66..7e4e379 100644 --- a/src/core/timeline_seed_tests.rs +++ b/src/core/timeline_seed_tests.rs @@ -85,16 +85,18 @@ fn insert_note( conn.last_insert_rowid() } -#[test] -fn test_seed_empty_query_returns_empty() { +#[tokio::test] +async fn test_seed_empty_query_returns_empty() { let conn = setup_test_db(); - let result = seed_timeline(&conn, "", None, None, 50, 10).unwrap(); + let result = seed_timeline(&conn, None, "", None, None, 50, 10) + .await + .unwrap(); assert!(result.seed_entities.is_empty()); assert!(result.evidence_notes.is_empty()); } -#[test] -fn test_seed_no_matches_returns_empty() { +#[tokio::test] +async fn test_seed_no_matches_returns_empty() { let conn = setup_test_db(); let project_id = insert_test_project(&conn); let issue_id = insert_test_issue(&conn, project_id, 1); @@ -106,12 +108,14 @@ fn test_seed_no_matches_returns_empty() { "unrelated content here", ); - let result = seed_timeline(&conn, "nonexistent_xyzzy_query", None, None, 50, 10).unwrap(); + let result = seed_timeline(&conn, None, "nonexistent_xyzzy_query", None, None, 50, 10) + .await + .unwrap(); assert!(result.seed_entities.is_empty()); } -#[test] -fn test_seed_finds_issue() { +#[tokio::test] +async fn test_seed_finds_issue() { let conn = setup_test_db(); let project_id = insert_test_project(&conn); let issue_id = insert_test_issue(&conn, project_id, 42); @@ -123,15 +127,17 @@ fn test_seed_finds_issue() { "authentication error in login flow", ); - let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap(); + let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10) + .await + .unwrap(); assert_eq!(result.seed_entities.len(), 1); assert_eq!(result.seed_entities[0].entity_type, "issue"); assert_eq!(result.seed_entities[0].entity_iid, 42); assert_eq!(result.seed_entities[0].project_path, "group/project"); } -#[test] -fn test_seed_finds_mr() { +#[tokio::test] +async fn test_seed_finds_mr() { let conn = setup_test_db(); let project_id = insert_test_project(&conn); let mr_id = insert_test_mr(&conn, project_id, 99); @@ -143,14 +149,16 @@ fn test_seed_finds_mr() { "fix authentication bug", ); - let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap(); + let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10) + .await + .unwrap(); assert_eq!(result.seed_entities.len(), 1); assert_eq!(result.seed_entities[0].entity_type, "merge_request"); assert_eq!(result.seed_entities[0].entity_iid, 99); } -#[test] -fn test_seed_deduplicates_entities() { +#[tokio::test] +async fn test_seed_deduplicates_entities() { let conn = setup_test_db(); let project_id = insert_test_project(&conn); let issue_id = insert_test_issue(&conn, project_id, 10); @@ -172,14 +180,16 @@ fn test_seed_deduplicates_entities() { "authentication error second doc", ); - let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap(); + let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10) + .await + .unwrap(); // Should deduplicate: both map to the same issue assert_eq!(result.seed_entities.len(), 1); assert_eq!(result.seed_entities[0].entity_iid, 10); } -#[test] -fn test_seed_resolves_discussion_to_parent() { +#[tokio::test] +async fn test_seed_resolves_discussion_to_parent() { let conn = setup_test_db(); let project_id = insert_test_project(&conn); let issue_id = insert_test_issue(&conn, project_id, 7); @@ -192,14 +202,16 @@ fn test_seed_resolves_discussion_to_parent() { "deployment pipeline failed", ); - let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap(); + let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10) + .await + .unwrap(); assert_eq!(result.seed_entities.len(), 1); assert_eq!(result.seed_entities[0].entity_type, "issue"); assert_eq!(result.seed_entities[0].entity_iid, 7); } -#[test] -fn test_seed_evidence_capped() { +#[tokio::test] +async fn test_seed_evidence_capped() { let conn = setup_test_db(); let project_id = insert_test_project(&conn); let issue_id = insert_test_issue(&conn, project_id, 1); @@ -223,12 +235,14 @@ fn test_seed_evidence_capped() { ); } - let result = seed_timeline(&conn, "deployment", None, None, 50, 5).unwrap(); + let result = seed_timeline(&conn, None, "deployment", None, None, 50, 5) + .await + .unwrap(); assert!(result.evidence_notes.len() <= 5); } -#[test] -fn test_seed_evidence_snippet_truncated() { +#[tokio::test] +async fn test_seed_evidence_snippet_truncated() { let conn = setup_test_db(); let project_id = insert_test_project(&conn); let issue_id = insert_test_issue(&conn, project_id, 1); @@ -244,7 +258,9 @@ fn test_seed_evidence_snippet_truncated() { let long_body = "x".repeat(500); insert_note(&conn, disc_id, project_id, &long_body, false); - let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap(); + let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10) + .await + .unwrap(); assert!(!result.evidence_notes.is_empty()); if let TimelineEventType::NoteEvidence { snippet, .. } = &result.evidence_notes[0].event_type { assert!(snippet.chars().count() <= 200); @@ -253,8 +269,8 @@ fn test_seed_evidence_snippet_truncated() { } } -#[test] -fn test_seed_respects_project_filter() { +#[tokio::test] +async fn test_seed_respects_project_filter() { let conn = setup_test_db(); let project_id = insert_test_project(&conn); @@ -285,7 +301,17 @@ fn test_seed_respects_project_filter() { ); // Filter to project 1 only - let result = seed_timeline(&conn, "authentication", Some(project_id), None, 50, 10).unwrap(); + let result = seed_timeline( + &conn, + None, + "authentication", + Some(project_id), + None, + 50, + 10, + ) + .await + .unwrap(); assert_eq!(result.seed_entities.len(), 1); assert_eq!(result.seed_entities[0].project_path, "group/project"); } diff --git a/src/main.rs b/src/main.rs index e0962b2..51a5783 100644 --- a/src/main.rs +++ b/src/main.rs @@ -179,7 +179,9 @@ async fn main() { Some(Commands::Search(args)) => { handle_search(cli.config.as_deref(), args, robot_mode).await } - Some(Commands::Timeline(args)) => handle_timeline(cli.config.as_deref(), args, robot_mode), + Some(Commands::Timeline(args)) => { + handle_timeline(cli.config.as_deref(), args, robot_mode).await + } Some(Commands::Who(args)) => handle_who(cli.config.as_deref(), args, robot_mode), Some(Commands::Drift { entity_type, @@ -1763,7 +1765,7 @@ async fn handle_stats( Ok(()) } -fn handle_timeline( +async fn handle_timeline( config_override: Option<&str>, args: TimelineArgs, robot_mode: bool, @@ -1784,7 +1786,7 @@ fn handle_timeline( max_evidence: args.max_evidence, }; - let result = run_timeline(&config, ¶ms)?; + let result = run_timeline(&config, ¶ms).await?; if robot_mode { print_timeline_json_with_meta( diff --git a/tests/timeline_pipeline_tests.rs b/tests/timeline_pipeline_tests.rs index 5181b39..05dd8b1 100644 --- a/tests/timeline_pipeline_tests.rs +++ b/tests/timeline_pipeline_tests.rs @@ -108,8 +108,8 @@ fn insert_label_event( /// Full pipeline: seed -> expand -> collect for a scenario with an issue /// that has a closing MR, state changes, and label events. -#[test] -fn pipeline_seed_expand_collect_end_to_end() { +#[tokio::test] +async fn pipeline_seed_expand_collect_end_to_end() { let conn = setup_db(); let project_id = insert_project(&conn, "group/project"); @@ -149,7 +149,9 @@ fn pipeline_seed_expand_collect_end_to_end() { insert_label_event(&conn, project_id, Some(issue_id), "bug", 1500); // SEED: find entities matching "authentication" - let seed_result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap(); + let seed_result = seed_timeline(&conn, None, "authentication", None, None, 50, 10) + .await + .unwrap(); assert!( !seed_result.seed_entities.is_empty(), "Seed should find at least one entity" @@ -213,12 +215,14 @@ fn pipeline_seed_expand_collect_end_to_end() { } /// Verify the pipeline handles an empty FTS result gracefully. -#[test] -fn pipeline_empty_query_produces_empty_result() { +#[tokio::test] +async fn pipeline_empty_query_produces_empty_result() { let conn = setup_db(); let _project_id = insert_project(&conn, "group/project"); - let seed_result = seed_timeline(&conn, "", None, None, 50, 10).unwrap(); + let seed_result = seed_timeline(&conn, None, "", None, None, 50, 10) + .await + .unwrap(); assert!(seed_result.seed_entities.is_empty()); let expand_result = expand_timeline(&conn, &seed_result.seed_entities, 1, false, 100).unwrap(); @@ -237,8 +241,8 @@ fn pipeline_empty_query_produces_empty_result() { } /// Verify since filter propagates through the full pipeline. -#[test] -fn pipeline_since_filter_excludes_old_events() { +#[tokio::test] +async fn pipeline_since_filter_excludes_old_events() { let conn = setup_db(); let project_id = insert_project(&conn, "group/project"); @@ -255,7 +259,9 @@ fn pipeline_since_filter_excludes_old_events() { insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 2000); insert_state_event(&conn, project_id, Some(issue_id), None, "reopened", 8000); - let seed_result = seed_timeline(&conn, "deploy", None, None, 50, 10).unwrap(); + let seed_result = seed_timeline(&conn, None, "deploy", None, None, 50, 10) + .await + .unwrap(); let expand_result = expand_timeline(&conn, &seed_result.seed_entities, 0, false, 100).unwrap(); // Collect with since=5000: should exclude Created(1000) and closed(2000) @@ -274,8 +280,8 @@ fn pipeline_since_filter_excludes_old_events() { } /// Verify unresolved references use Option for target_iid. -#[test] -fn pipeline_unresolved_refs_have_optional_iid() { +#[tokio::test] +async fn pipeline_unresolved_refs_have_optional_iid() { let conn = setup_db(); let project_id = insert_project(&conn, "group/project"); @@ -302,7 +308,9 @@ fn pipeline_unresolved_refs_have_optional_iid() { ) .unwrap(); - let seed_result = seed_timeline(&conn, "cross project", None, None, 50, 10).unwrap(); + let seed_result = seed_timeline(&conn, None, "cross project", None, None, 50, 10) + .await + .unwrap(); let expand_result = expand_timeline(&conn, &seed_result.seed_entities, 1, false, 100).unwrap(); assert_eq!(expand_result.unresolved_references.len(), 2);