feat(timeline): upgrade seed phase to hybrid search

Replace FTS-only seed entity discovery with hybrid search (FTS + vector
via RRF), using the same search_hybrid infrastructure as the search
command. Falls back gracefully to FTS-only when Ollama is unavailable.

Changes:
- seed_timeline() now accepts OllamaClient, delegates to search_hybrid
- New resolve_documents_to_entities() replaces find_seed_entities()
- SeedResult gains search_mode field tracking actual mode used
- TimelineResult carries search_mode through to JSON renderer
- run_timeline wires up OllamaClient from config
- handle_timeline made async for the hybrid search await
- Tests updated for new function signatures

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
teernisse
2026-02-13 13:50:14 -05:00
parent e6771709f1
commit 4f3ec72923
6 changed files with 192 additions and 84 deletions

View File

@@ -13,6 +13,7 @@ use crate::core::timeline::{
use crate::core::timeline_collect::collect_events;
use crate::core::timeline_expand::expand_timeline;
use crate::core::timeline_seed::seed_timeline;
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
/// Parameters for running the timeline pipeline.
pub struct TimelineParams {
@@ -28,7 +29,7 @@ pub struct TimelineParams {
}
/// Run the full timeline pipeline: SEED -> EXPAND -> COLLECT.
pub fn run_timeline(config: &Config, params: &TimelineParams) -> Result<TimelineResult> {
pub async fn run_timeline(config: &Config, params: &TimelineParams) -> Result<TimelineResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
@@ -50,15 +51,25 @@ pub fn run_timeline(config: &Config, params: &TimelineParams) -> Result<Timeline
})
.transpose()?;
// Stage 1+2: SEED + HYDRATE
// Construct OllamaClient for hybrid search (same pattern as run_search)
let ollama_cfg = &config.embedding;
let client = OllamaClient::new(OllamaConfig {
base_url: ollama_cfg.base_url.clone(),
model: ollama_cfg.model.clone(),
..OllamaConfig::default()
});
// Stage 1+2: SEED + HYDRATE (hybrid search with FTS fallback)
let seed_result = seed_timeline(
&conn,
Some(&client),
&params.query,
project_id,
since_ms,
params.max_seeds,
params.max_evidence,
)?;
)
.await?;
// Stage 3: EXPAND
let expand_result = expand_timeline(
@@ -81,6 +92,7 @@ pub fn run_timeline(config: &Config, params: &TimelineParams) -> Result<Timeline
Ok(TimelineResult {
query: params.query.clone(),
search_mode: seed_result.search_mode,
events,
total_events_before_limit: total_before_limit,
seed_entities: seed_result.seed_entities,
@@ -258,7 +270,7 @@ pub fn print_timeline_json_with_meta(
ok: true,
data: TimelineDataJson::from_result(result),
meta: TimelineMetaJson {
search_mode: "lexical".to_owned(),
search_mode: result.search_mode.clone(),
expansion_depth: depth,
expand_mentions,
total_entities: result.seed_entities.len() + result.expanded_entities.len(),

View File

@@ -118,6 +118,8 @@ pub struct UnresolvedRef {
#[derive(Debug, Clone, Serialize)]
pub struct TimelineResult {
pub query: String,
/// The search mode actually used for seeding (e.g. "hybrid", "lexical", "lexical (hybrid fallback)").
pub search_mode: String,
pub events: Vec<TimelineEvent>,
/// Total events before the `--limit` was applied (for meta.total_events vs meta.showing).
#[serde(skip)]

View File

@@ -5,23 +5,28 @@ use tracing::debug;
use crate::core::error::Result;
use crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType, resolve_entity_ref};
use crate::search::{FtsQueryMode, to_fts_query};
use crate::embedding::ollama::OllamaClient;
use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_fts_query};
/// Result of the seed + hydrate phases.
pub struct SeedResult {
pub seed_entities: Vec<EntityRef>,
pub evidence_notes: Vec<TimelineEvent>,
/// The search mode actually used (hybrid with fallback info).
pub search_mode: String,
}
/// Run the SEED + HYDRATE phases of the timeline pipeline.
///
/// 1. SEED: FTS5 keyword search over documents -> matched document IDs
/// 1. SEED: Hybrid search (FTS + vector via RRF) over documents -> matched document IDs
/// 2. HYDRATE: Map document IDs -> source entities + top matched notes as evidence
///
/// When `client` is `None` or Ollama is unavailable, falls back to FTS-only search.
/// Discussion documents are resolved to their parent entity (issue or MR).
/// Entities are deduplicated. Evidence notes are capped at `max_evidence`.
pub fn seed_timeline(
pub async fn seed_timeline(
conn: &Connection,
client: Option<&OllamaClient>,
query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
@@ -33,57 +38,110 @@ pub fn seed_timeline(
return Ok(SeedResult {
seed_entities: Vec::new(),
evidence_notes: Vec::new(),
search_mode: "lexical".to_owned(),
});
}
let seed_entities = find_seed_entities(conn, &fts_query, project_id, since_ms, max_seeds)?;
// Use hybrid search for seed entity discovery (better recall than FTS alone).
// search_hybrid gracefully falls back to FTS-only when Ollama is unavailable.
let filters = SearchFilters {
project_id,
updated_since: since_ms,
limit: max_seeds.saturating_mul(3),
..SearchFilters::default()
};
let (hybrid_results, warnings) = search_hybrid(
conn,
client,
query,
SearchMode::Hybrid,
&filters,
FtsQueryMode::Safe,
)
.await?;
let search_mode = if warnings
.iter()
.any(|w| w.contains("falling back") || w.contains("FTS only"))
{
"lexical (hybrid fallback)".to_owned()
} else if client.is_some() && !hybrid_results.is_empty() {
"hybrid".to_owned()
} else {
"lexical".to_owned()
};
for w in &warnings {
debug!(warning = %w, "hybrid search warning during timeline seeding");
}
let seed_entities = resolve_documents_to_entities(
conn,
&hybrid_results
.iter()
.map(|r| r.document_id)
.collect::<Vec<_>>(),
max_seeds,
)?;
// Evidence notes stay FTS-only (supplementary context, not worth a second embedding call)
let evidence_notes = find_evidence_notes(conn, &fts_query, project_id, since_ms, max_evidence)?;
Ok(SeedResult {
seed_entities,
evidence_notes,
search_mode,
})
}
/// Find seed entities via FTS5 search, resolving discussions to their parent entity.
fn find_seed_entities(
/// Resolve a list of document IDs to deduplicated entity refs.
/// Discussion documents are resolved to their parent entity (issue or MR).
fn resolve_documents_to_entities(
conn: &Connection,
fts_query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
max_seeds: usize,
document_ids: &[i64],
max_entities: usize,
) -> Result<Vec<EntityRef>> {
let sql = r"
if document_ids.is_empty() {
return Ok(Vec::new());
}
let placeholders: String = document_ids
.iter()
.map(|_| "?")
.collect::<Vec<_>>()
.join(",");
let sql = format!(
r"
SELECT d.source_type, d.source_id, d.project_id,
disc.issue_id, disc.merge_request_id
FROM documents_fts
JOIN documents d ON d.id = documents_fts.rowid
FROM documents d
LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
WHERE documents_fts MATCH ?1
AND (?2 IS NULL OR d.project_id = ?2)
AND (?3 IS NULL OR d.updated_at >= ?3)
ORDER BY rank
LIMIT ?4
";
WHERE d.id IN ({placeholders})
ORDER BY CASE d.id {order_clause} END
",
order_clause = document_ids
.iter()
.enumerate()
.map(|(i, id)| format!("WHEN {id} THEN {i}"))
.collect::<Vec<_>>()
.join(" "),
);
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map(
rusqlite::params![
fts_query,
project_id,
since_ms,
max_seeds.saturating_mul(3) as i64
],
|row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, i64>(2)?,
row.get::<_, Option<i64>>(3)?,
row.get::<_, Option<i64>>(4)?,
))
},
)?;
let mut stmt = conn.prepare(&sql)?;
let params: Vec<&dyn rusqlite::types::ToSql> = document_ids
.iter()
.map(|id| id as &dyn rusqlite::types::ToSql)
.collect();
let rows = stmt.query_map(params.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, i64>(2)?,
row.get::<_, Option<i64>>(3)?,
row.get::<_, Option<i64>>(4)?,
))
})?;
let mut seen = HashSet::new();
let mut entities = Vec::new();
@@ -116,7 +174,7 @@ fn find_seed_entities(
entities.push(entity_ref);
}
if entities.len() >= max_seeds {
if entities.len() >= max_entities {
break;
}
}

View File

@@ -85,16 +85,18 @@ fn insert_note(
conn.last_insert_rowid()
}
#[test]
fn test_seed_empty_query_returns_empty() {
#[tokio::test]
async fn test_seed_empty_query_returns_empty() {
let conn = setup_test_db();
let result = seed_timeline(&conn, "", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
assert!(result.evidence_notes.is_empty());
}
#[test]
fn test_seed_no_matches_returns_empty() {
#[tokio::test]
async fn test_seed_no_matches_returns_empty() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
@@ -106,12 +108,14 @@ fn test_seed_no_matches_returns_empty() {
"unrelated content here",
);
let result = seed_timeline(&conn, "nonexistent_xyzzy_query", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "nonexistent_xyzzy_query", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
}
#[test]
fn test_seed_finds_issue() {
#[tokio::test]
async fn test_seed_finds_issue() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
@@ -123,15 +127,17 @@ fn test_seed_finds_issue() {
"authentication error in login flow",
);
let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 42);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
#[test]
fn test_seed_finds_mr() {
#[tokio::test]
async fn test_seed_finds_mr() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
@@ -143,14 +149,16 @@ fn test_seed_finds_mr() {
"fix authentication bug",
);
let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "merge_request");
assert_eq!(result.seed_entities[0].entity_iid, 99);
}
#[test]
fn test_seed_deduplicates_entities() {
#[tokio::test]
async fn test_seed_deduplicates_entities() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 10);
@@ -172,14 +180,16 @@ fn test_seed_deduplicates_entities() {
"authentication error second doc",
);
let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
// Should deduplicate: both map to the same issue
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_iid, 10);
}
#[test]
fn test_seed_resolves_discussion_to_parent() {
#[tokio::test]
async fn test_seed_resolves_discussion_to_parent() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 7);
@@ -192,14 +202,16 @@ fn test_seed_resolves_discussion_to_parent() {
"deployment pipeline failed",
);
let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 7);
}
#[test]
fn test_seed_evidence_capped() {
#[tokio::test]
async fn test_seed_evidence_capped() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
@@ -223,12 +235,14 @@ fn test_seed_evidence_capped() {
);
}
let result = seed_timeline(&conn, "deployment", None, None, 50, 5).unwrap();
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 5)
.await
.unwrap();
assert!(result.evidence_notes.len() <= 5);
}
#[test]
fn test_seed_evidence_snippet_truncated() {
#[tokio::test]
async fn test_seed_evidence_snippet_truncated() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
@@ -244,7 +258,9 @@ fn test_seed_evidence_snippet_truncated() {
let long_body = "x".repeat(500);
insert_note(&conn, disc_id, project_id, &long_body, false);
let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert!(!result.evidence_notes.is_empty());
if let TimelineEventType::NoteEvidence { snippet, .. } = &result.evidence_notes[0].event_type {
assert!(snippet.chars().count() <= 200);
@@ -253,8 +269,8 @@ fn test_seed_evidence_snippet_truncated() {
}
}
#[test]
fn test_seed_respects_project_filter() {
#[tokio::test]
async fn test_seed_respects_project_filter() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
@@ -285,7 +301,17 @@ fn test_seed_respects_project_filter() {
);
// Filter to project 1 only
let result = seed_timeline(&conn, "authentication", Some(project_id), None, 50, 10).unwrap();
let result = seed_timeline(
&conn,
None,
"authentication",
Some(project_id),
None,
50,
10,
)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}

View File

@@ -179,7 +179,9 @@ async fn main() {
Some(Commands::Search(args)) => {
handle_search(cli.config.as_deref(), args, robot_mode).await
}
Some(Commands::Timeline(args)) => handle_timeline(cli.config.as_deref(), args, robot_mode),
Some(Commands::Timeline(args)) => {
handle_timeline(cli.config.as_deref(), args, robot_mode).await
}
Some(Commands::Who(args)) => handle_who(cli.config.as_deref(), args, robot_mode),
Some(Commands::Drift {
entity_type,
@@ -1763,7 +1765,7 @@ async fn handle_stats(
Ok(())
}
fn handle_timeline(
async fn handle_timeline(
config_override: Option<&str>,
args: TimelineArgs,
robot_mode: bool,
@@ -1784,7 +1786,7 @@ fn handle_timeline(
max_evidence: args.max_evidence,
};
let result = run_timeline(&config, &params)?;
let result = run_timeline(&config, &params).await?;
if robot_mode {
print_timeline_json_with_meta(