Files
gitlore/src/core/timeline_seed.rs
teernisse c10471ddb9 feat(timeline): add entity-direct seeding (issue:N, mr:N syntax)
Adds issue:N / i:N / mr:N / m:N query syntax to bypass hybrid search
and seed the timeline directly from a known entity. All discussions for
the entity are gathered without needing Ollama.

- parse_timeline_query() detects entity-direct patterns
- resolve_entity_by_iid() resolves IID to EntityRef with ambiguity handling
- seed_timeline_direct() gathers all discussions for the entity
- 20 new tests (5 resolve, 6 direct seed, 9 parse)
- Updated CLI help text and robot-docs manifest
2026-02-13 15:22:45 -05:00

358 lines
12 KiB
Rust

use std::collections::HashSet;
use rusqlite::Connection;
use tracing::debug;
use crate::core::error::Result;
use crate::core::timeline::{
EntityRef, MatchedDiscussion, TimelineEvent, TimelineEventType, resolve_entity_by_iid,
resolve_entity_ref, truncate_to_chars,
};
use crate::embedding::ollama::OllamaClient;
use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_fts_query};
/// Result of the seed + hydrate phases.
pub struct SeedResult {
pub seed_entities: Vec<EntityRef>,
pub evidence_notes: Vec<TimelineEvent>,
/// Discussions matched during seeding, to be collected as full threads.
pub matched_discussions: Vec<MatchedDiscussion>,
/// The search mode actually used (hybrid with fallback info).
pub search_mode: String,
}
/// Run the SEED + HYDRATE phases of the timeline pipeline.
///
/// 1. SEED: Hybrid search (FTS + vector via RRF) over documents -> matched document IDs
/// 2. HYDRATE: Map document IDs -> source entities + top matched notes as evidence
///
/// When `client` is `None` or Ollama is unavailable, falls back to FTS-only search.
/// Discussion documents are resolved to their parent entity (issue or MR).
/// Entities are deduplicated. Evidence notes are capped at `max_evidence`.
pub async fn seed_timeline(
conn: &Connection,
client: Option<&OllamaClient>,
query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
max_seeds: usize,
max_evidence: usize,
) -> Result<SeedResult> {
let fts_query = to_fts_query(query, FtsQueryMode::Safe);
if fts_query.is_empty() {
return Ok(SeedResult {
seed_entities: Vec::new(),
evidence_notes: Vec::new(),
matched_discussions: Vec::new(),
search_mode: "lexical".to_owned(),
});
}
// Use hybrid search for seed entity discovery (better recall than FTS alone).
// search_hybrid gracefully falls back to FTS-only when Ollama is unavailable.
let filters = SearchFilters {
project_id,
updated_since: since_ms,
limit: max_seeds.saturating_mul(3),
..SearchFilters::default()
};
let (hybrid_results, warnings) = search_hybrid(
conn,
client,
query,
SearchMode::Hybrid,
&filters,
FtsQueryMode::Safe,
)
.await?;
let search_mode = if warnings
.iter()
.any(|w| w.contains("falling back") || w.contains("FTS only"))
{
"lexical (hybrid fallback)".to_owned()
} else if client.is_some() && !hybrid_results.is_empty() {
"hybrid".to_owned()
} else {
"lexical".to_owned()
};
for w in &warnings {
debug!(warning = %w, "hybrid search warning during timeline seeding");
}
let (seed_entities, matched_discussions) = resolve_documents_to_entities(
conn,
&hybrid_results
.iter()
.map(|r| r.document_id)
.collect::<Vec<_>>(),
max_seeds,
)?;
// Evidence notes stay FTS-only (supplementary context, not worth a second embedding call)
let evidence_notes = find_evidence_notes(conn, &fts_query, project_id, since_ms, max_evidence)?;
Ok(SeedResult {
seed_entities,
evidence_notes,
matched_discussions,
search_mode,
})
}
/// Seed the timeline directly from an entity IID, bypassing search entirely.
///
/// Used for `issue:42` / `mr:99` syntax. Resolves the entity, gathers ALL its
/// discussions, and returns a `SeedResult` compatible with the rest of the pipeline.
pub fn seed_timeline_direct(
conn: &Connection,
entity_type: &str,
iid: i64,
project_id: Option<i64>,
) -> Result<SeedResult> {
let entity_ref = resolve_entity_by_iid(conn, entity_type, iid, project_id)?;
// Gather all discussions for this entity (not search-matched, ALL of them)
let entity_id_col = match entity_type {
"issue" => "issue_id",
"merge_request" => "merge_request_id",
_ => {
return Ok(SeedResult {
seed_entities: vec![entity_ref],
evidence_notes: Vec::new(),
matched_discussions: Vec::new(),
search_mode: "direct".to_owned(),
});
}
};
let sql = format!("SELECT id, project_id FROM discussions WHERE {entity_id_col} = ?1");
let mut stmt = conn.prepare(&sql)?;
let matched_discussions: Vec<MatchedDiscussion> = stmt
.query_map(rusqlite::params![entity_ref.entity_id], |row| {
Ok(MatchedDiscussion {
discussion_id: row.get(0)?,
entity_type: entity_type.to_owned(),
entity_id: entity_ref.entity_id,
project_id: row.get(1)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(SeedResult {
seed_entities: vec![entity_ref],
evidence_notes: Vec::new(),
matched_discussions,
search_mode: "direct".to_owned(),
})
}
/// Resolve a list of document IDs to deduplicated entity refs and matched discussions.
/// Discussion and note documents are resolved to their parent entity (issue or MR).
/// Returns (entities, matched_discussions).
fn resolve_documents_to_entities(
conn: &Connection,
document_ids: &[i64],
max_entities: usize,
) -> Result<(Vec<EntityRef>, Vec<MatchedDiscussion>)> {
if document_ids.is_empty() {
return Ok((Vec::new(), Vec::new()));
}
let placeholders: String = document_ids
.iter()
.map(|_| "?")
.collect::<Vec<_>>()
.join(",");
let sql = format!(
r"
SELECT d.source_type, d.source_id, d.project_id,
COALESCE(disc.issue_id, note_disc.issue_id) AS issue_id,
COALESCE(disc.merge_request_id, note_disc.merge_request_id) AS mr_id,
COALESCE(disc.id, note_disc.id) AS discussion_id
FROM documents d
LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
LEFT JOIN notes n ON n.id = d.source_id AND d.source_type = 'note'
LEFT JOIN discussions note_disc ON note_disc.id = n.discussion_id AND d.source_type = 'note'
WHERE d.id IN ({placeholders})
ORDER BY CASE d.id {order_clause} END
",
order_clause = document_ids
.iter()
.enumerate()
.map(|(i, id)| format!("WHEN {id} THEN {i}"))
.collect::<Vec<_>>()
.join(" "),
);
let mut stmt = conn.prepare(&sql)?;
let params: Vec<&dyn rusqlite::types::ToSql> = document_ids
.iter()
.map(|id| id as &dyn rusqlite::types::ToSql)
.collect();
let rows = stmt.query_map(params.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?, // source_type
row.get::<_, i64>(1)?, // source_id
row.get::<_, i64>(2)?, // project_id
row.get::<_, Option<i64>>(3)?, // issue_id (coalesced)
row.get::<_, Option<i64>>(4)?, // mr_id (coalesced)
row.get::<_, Option<i64>>(5)?, // discussion_id (coalesced)
))
})?;
let mut seen_entities = HashSet::new();
let mut seen_discussions = HashSet::new();
let mut entities = Vec::new();
let mut matched_discussions = Vec::new();
for row_result in rows {
let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id, discussion_id) =
row_result?;
let (entity_type, entity_id) = match source_type.as_str() {
"issue" => ("issue".to_owned(), source_id),
"merge_request" => ("merge_request".to_owned(), source_id),
"discussion" | "note" => {
if let Some(issue_id) = disc_issue_id {
("issue".to_owned(), issue_id)
} else if let Some(mr_id) = disc_mr_id {
("merge_request".to_owned(), mr_id)
} else {
continue; // orphaned discussion/note
}
}
_ => continue,
};
// Capture matched discussion (deduplicated)
if let Some(disc_id) = discussion_id
&& (source_type == "discussion" || source_type == "note")
&& seen_discussions.insert(disc_id)
{
matched_discussions.push(MatchedDiscussion {
discussion_id: disc_id,
entity_type: entity_type.clone(),
entity_id,
project_id: proj_id,
});
}
// Entity dedup
let key = (entity_type.clone(), entity_id);
if !seen_entities.insert(key) {
continue;
}
if let Some(entity_ref) = resolve_entity_ref(conn, &entity_type, entity_id, Some(proj_id))?
{
entities.push(entity_ref);
}
if entities.len() >= max_entities {
break;
}
}
Ok((entities, matched_discussions))
}
/// Find evidence notes: FTS5-matched discussion notes that provide context.
fn find_evidence_notes(
conn: &Connection,
fts_query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
max_evidence: usize,
) -> Result<Vec<TimelineEvent>> {
let sql = r"
SELECT n.id AS note_id, n.body, n.created_at, n.author_username,
disc.id AS discussion_id,
CASE WHEN disc.issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END AS parent_type,
COALESCE(disc.issue_id, disc.merge_request_id) AS parent_entity_id,
d.project_id
FROM documents_fts
JOIN documents d ON d.id = documents_fts.rowid
JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
JOIN notes n ON n.discussion_id = disc.id AND n.is_system = 0
WHERE documents_fts MATCH ?1
AND (?2 IS NULL OR d.project_id = ?2)
AND (?3 IS NULL OR d.updated_at >= ?3)
ORDER BY rank
LIMIT ?4
";
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map(
rusqlite::params![fts_query, project_id, since_ms, max_evidence as i64],
|row| {
Ok((
row.get::<_, i64>(0)?, // note_id
row.get::<_, Option<String>>(1)?, // body
row.get::<_, i64>(2)?, // created_at
row.get::<_, Option<String>>(3)?, // author
row.get::<_, i64>(4)?, // discussion_id
row.get::<_, String>(5)?, // parent_type
row.get::<_, i64>(6)?, // parent_entity_id
row.get::<_, i64>(7)?, // project_id
))
},
)?;
let mut events = Vec::new();
for row_result in rows {
let (
note_id,
body,
created_at,
author,
discussion_id,
parent_type,
parent_entity_id,
proj_id,
) = row_result?;
let snippet = truncate_to_chars(body.as_deref().unwrap_or(""), 200);
let entity_ref = resolve_entity_ref(conn, &parent_type, parent_entity_id, Some(proj_id))?;
let (iid, project_path) = match entity_ref {
Some(ref e) => (e.entity_iid, e.project_path.clone()),
None => {
debug!(
parent_type,
parent_entity_id,
proj_id,
"Skipping evidence note: parent entity not found (orphaned discussion)"
);
continue;
}
};
events.push(TimelineEvent {
timestamp: created_at,
entity_type: parent_type,
entity_id: parent_entity_id,
entity_iid: iid,
project_path,
event_type: TimelineEventType::NoteEvidence {
note_id,
snippet,
discussion_id: Some(discussion_id),
},
summary: format!("Note by {}", author.as_deref().unwrap_or("unknown")),
actor: author,
url: None,
is_seed: true,
});
}
Ok(events)
}
#[cfg(test)]
#[path = "timeline_seed_tests.rs"]
mod tests;