feat(timeline): add entity-direct seeding and round-robin evidence selection

Enhance the timeline command with two major improvements:

1. Entity-direct seeding syntax (bypass search):
   lore timeline issue:42    # Timeline for specific issue
   lore timeline i:42        # Short form
   lore timeline mr:99       # Timeline for specific MR
   lore timeline m:99        # Short form

   This directly resolves the entity and gathers ALL its discussions without
   requiring search/embedding. Useful when you know exactly which entity you want.

2. Round-robin evidence note selection:
   Previously, evidence notes were taken in FTS rank order, which could result
   in all notes coming from a single high-traffic discussion. Now we:
   - Fetch 5x the requested limit (or minimum 50)
   - Group notes by discussion_id
   - Select round-robin across discussions
   - This ensures diverse evidence from multiple conversations

API changes:
- Renamed total_events_before_limit -> total_filtered_events (clearer semantics)
- Added resolve_entity_by_iid() in timeline.rs for IID-based entity resolution
- Added seed_timeline_direct() in timeline_seed.rs for search-free seeding
- Added round_robin_select_by_discussion() helper function

The entity-direct mode uses search_mode: "direct" to distinguish from
"hybrid" or "lexical" search modes in the response metadata.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
teernisse
2026-02-26 11:06:12 -05:00
parent 8657e10822
commit a45c37c7e4
3 changed files with 89 additions and 22 deletions

View File

@@ -175,7 +175,7 @@ pub async fn run_timeline(config: &Config, params: &TimelineParams) -> Result<Ti
query: params.query.clone(), query: params.query.clone(),
search_mode: seed_result.search_mode, search_mode: seed_result.search_mode,
events, events,
total_events_before_limit: total_before_limit, total_filtered_events: total_before_limit,
seed_entities: seed_result.seed_entities, seed_entities: seed_result.seed_entities,
expanded_entities: expand_result.expanded_entities, expanded_entities: expand_result.expanded_entities,
unresolved_references: expand_result.unresolved_references, unresolved_references: expand_result.unresolved_references,
@@ -342,7 +342,7 @@ fn format_entity_ref(entity_type: &str, iid: i64) -> String {
/// Render timeline as robot-mode JSON in {ok, data, meta} envelope. /// Render timeline as robot-mode JSON in {ok, data, meta} envelope.
pub fn print_timeline_json_with_meta( pub fn print_timeline_json_with_meta(
result: &TimelineResult, result: &TimelineResult,
total_events_before_limit: usize, total_filtered_events: usize,
depth: u32, depth: u32,
include_mentions: bool, include_mentions: bool,
fields: Option<&[String]>, fields: Option<&[String]>,
@@ -355,7 +355,7 @@ pub fn print_timeline_json_with_meta(
expansion_depth: depth, expansion_depth: depth,
include_mentions, include_mentions,
total_entities: result.seed_entities.len() + result.expanded_entities.len(), total_entities: result.seed_entities.len() + result.expanded_entities.len(),
total_events: total_events_before_limit, total_events: total_filtered_events,
evidence_notes_included: count_evidence_notes(&result.events), evidence_notes_included: count_evidence_notes(&result.events),
discussion_threads_included: count_discussion_threads(&result.events), discussion_threads_included: count_discussion_threads(&result.events),
unresolved_references: result.unresolved_references.len(), unresolved_references: result.unresolved_references.len(),

View File

@@ -164,9 +164,10 @@ pub struct TimelineResult {
/// The search mode actually used for seeding (e.g. "hybrid", "lexical", "lexical (hybrid fallback)"). /// The search mode actually used for seeding (e.g. "hybrid", "lexical", "lexical (hybrid fallback)").
pub search_mode: String, pub search_mode: String,
pub events: Vec<TimelineEvent>, pub events: Vec<TimelineEvent>,
/// Total events before the `--limit` was applied (for meta.total_events vs meta.showing). /// Total events after filters (e.g., --since) but before --limit was applied.
/// Use this to show "showing X of Y filtered events".
#[serde(skip)] #[serde(skip)]
pub total_events_before_limit: usize, pub total_filtered_events: usize,
pub seed_entities: Vec<EntityRef>, pub seed_entities: Vec<EntityRef>,
pub expanded_entities: Vec<ExpandedEntityRef>, pub expanded_entities: Vec<ExpandedEntityRef>,
pub unresolved_references: Vec<UnresolvedRef>, pub unresolved_references: Vec<UnresolvedRef>,

View File

@@ -260,6 +260,9 @@ fn resolve_documents_to_entities(
} }
/// Find evidence notes: FTS5-matched discussion notes that provide context. /// Find evidence notes: FTS5-matched discussion notes that provide context.
///
/// Uses round-robin selection across discussions to ensure diverse evidence
/// rather than all notes coming from a single high-traffic discussion.
fn find_evidence_notes( fn find_evidence_notes(
conn: &Connection, conn: &Connection,
fts_query: &str, fts_query: &str,
@@ -267,6 +270,10 @@ fn find_evidence_notes(
since_ms: Option<i64>, since_ms: Option<i64>,
max_evidence: usize, max_evidence: usize,
) -> Result<Vec<TimelineEvent>> { ) -> Result<Vec<TimelineEvent>> {
// Fetch extra rows to enable round-robin across discussions.
// We'll select from multiple discussions in rotation.
let fetch_limit = (max_evidence * 5).max(50);
let sql = r" let sql = r"
SELECT n.id AS note_id, n.body, n.created_at, n.author_username, SELECT n.id AS note_id, n.body, n.created_at, n.author_username,
disc.id AS discussion_id, disc.id AS discussion_id,
@@ -286,7 +293,7 @@ fn find_evidence_notes(
let mut stmt = conn.prepare(sql)?; let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map( let rows = stmt.query_map(
rusqlite::params![fts_query, project_id, since_ms, max_evidence as i64], rusqlite::params![fts_query, project_id, since_ms, fetch_limit as i64],
|row| { |row| {
Ok(( Ok((
row.get::<_, i64>(0)?, // note_id row.get::<_, i64>(0)?, // note_id
@@ -331,7 +338,9 @@ fn find_evidence_notes(
} }
}; };
events.push(TimelineEvent { events.push((
discussion_id,
TimelineEvent {
timestamp: created_at, timestamp: created_at,
entity_type: parent_type, entity_type: parent_type,
entity_id: parent_entity_id, entity_id: parent_entity_id,
@@ -346,10 +355,67 @@ fn find_evidence_notes(
actor: author, actor: author,
url: None, url: None,
is_seed: true, is_seed: true,
}); },
));
} }
Ok(events) // Round-robin selection across discussions for diverse evidence
Ok(round_robin_select_by_discussion(events, max_evidence))
}
/// Round-robin select events across discussions to ensure diverse evidence.
///
/// Groups events by discussion_id, then iterates through discussions in order,
/// taking one event from each until the limit is reached.
fn round_robin_select_by_discussion(
events: Vec<(i64, TimelineEvent)>,
max_evidence: usize,
) -> Vec<TimelineEvent> {
use std::collections::HashMap;
if events.is_empty() || max_evidence == 0 {
return Vec::new();
}
// Group events by discussion_id, preserving order within each group
let mut by_discussion: HashMap<i64, Vec<TimelineEvent>> = HashMap::new();
let mut discussion_order: Vec<i64> = Vec::new();
for (discussion_id, event) in events {
if !by_discussion.contains_key(&discussion_id) {
discussion_order.push(discussion_id);
}
by_discussion.entry(discussion_id).or_default().push(event);
}
// Round-robin selection
let mut result = Vec::with_capacity(max_evidence);
let mut indices: Vec<usize> = vec![0; discussion_order.len()];
'outer: loop {
let mut made_progress = false;
for (disc_idx, &discussion_id) in discussion_order.iter().enumerate() {
let notes = by_discussion.get(&discussion_id).unwrap();
let note_idx = indices[disc_idx];
if note_idx < notes.len() {
result.push(notes[note_idx].clone());
indices[disc_idx] += 1;
made_progress = true;
if result.len() >= max_evidence {
break 'outer;
}
}
}
if !made_progress {
break;
}
}
result
} }
#[cfg(test)] #[cfg(test)]