feat: Implement Gate 3 timeline pipeline and Gate 4 migration scaffolding

Complete 5 beads for the Phase B temporal intelligence feature:

- bd-1oo: Register migration 015 (commit SHAs, closes watermark) and
  create migration 016 (mr_file_changes table with 4 indexes for
  Gate 4 file-history)

- bd-20e: Define TimelineEvent model with 9 event type variants,
  EntityRef, ExpandedEntityRef, UnresolvedRef, and TimelineResult
  types. Ord impl for chronological sorting with stable tiebreak.

- bd-32q: Implement timeline seed phase - FTS5 keyword search to
  entity IDs with discussion-to-parent resolution, entity dedup,
  and evidence note extraction with snippet truncation.

- bd-ypa: Implement timeline expand phase - BFS cross-reference
  expansion over entity_references with bidirectional traversal,
  depth limiting, mention filtering, provenance tracking, and
  unresolved reference collection.

- bd-3as: Implement timeline event collection - gathers Created,
  StateChanged, LabelAdded/Removed, MilestoneSet/Removed, Merged,
  and NoteEvidence events. Merged dedup (state=merged -> Merged
  variant only). NULL label/milestone fallbacks. Chronological
  interleaving with since filter and limit.

38 new tests, all 445 tests pass. All quality gates clean.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 16:54:28 -05:00
parent d1b2b5fa7d
commit 3767c33c28
9 changed files with 2143 additions and 6 deletions

View File

@@ -44,6 +44,14 @@ const MIGRATIONS: &[(&str, &str)] = &[
"014",
include_str!("../../migrations/014_sync_runs_enrichment.sql"),
),
(
"015",
include_str!("../../migrations/015_commit_shas_and_closes_watermark.sql"),
),
(
"016",
include_str!("../../migrations/016_mr_file_changes.sql"),
),
];
pub fn create_connection(db_path: &Path) -> Result<Connection> {

View File

@@ -14,6 +14,10 @@ pub mod project;
pub mod references;
pub mod sync_run;
pub mod time;
pub mod timeline;
pub mod timeline_collect;
pub mod timeline_expand;
pub mod timeline_seed;
pub use config::Config;
pub use error::{LoreError, Result};

253
src/core/timeline.rs Normal file
View File

@@ -0,0 +1,253 @@
use std::cmp::Ordering;
use serde::Serialize;
/// The core timeline event. All pipeline stages produce or consume these.
/// Spec ref: Section 3.3 "Event Model"
#[derive(Debug, Clone, Serialize)]
pub struct TimelineEvent {
pub timestamp: i64,
pub entity_type: String,
#[serde(skip)]
pub entity_id: i64,
pub entity_iid: i64,
pub project_path: String,
pub event_type: TimelineEventType,
pub summary: String,
pub actor: Option<String>,
pub url: Option<String>,
pub is_seed: bool,
}
impl PartialEq for TimelineEvent {
fn eq(&self, other: &Self) -> bool {
self.timestamp == other.timestamp
&& self.entity_id == other.entity_id
&& self.event_type_discriminant() == other.event_type_discriminant()
}
}
impl Eq for TimelineEvent {}
impl PartialOrd for TimelineEvent {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for TimelineEvent {
fn cmp(&self, other: &Self) -> Ordering {
self.timestamp
.cmp(&other.timestamp)
.then_with(|| self.entity_id.cmp(&other.entity_id))
.then_with(|| {
self.event_type_discriminant()
.cmp(&other.event_type_discriminant())
})
}
}
impl TimelineEvent {
fn event_type_discriminant(&self) -> u8 {
match &self.event_type {
TimelineEventType::Created => 0,
TimelineEventType::StateChanged { .. } => 1,
TimelineEventType::LabelAdded { .. } => 2,
TimelineEventType::LabelRemoved { .. } => 3,
TimelineEventType::MilestoneSet { .. } => 4,
TimelineEventType::MilestoneRemoved { .. } => 5,
TimelineEventType::Merged => 6,
TimelineEventType::NoteEvidence { .. } => 7,
TimelineEventType::CrossReferenced { .. } => 8,
}
}
}
/// Per spec Section 3.3. Serde tagged enum for JSON output.
#[derive(Debug, Clone, Serialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum TimelineEventType {
Created,
StateChanged {
state: String,
},
LabelAdded {
label: String,
},
LabelRemoved {
label: String,
},
MilestoneSet {
milestone: String,
},
MilestoneRemoved {
milestone: String,
},
Merged,
NoteEvidence {
note_id: i64,
snippet: String,
discussion_id: Option<i64>,
},
CrossReferenced {
target: String,
},
}
/// Internal entity reference used across pipeline stages.
#[derive(Debug, Clone, Serialize)]
pub struct EntityRef {
pub entity_type: String,
pub entity_id: i64,
pub entity_iid: i64,
pub project_path: String,
}
/// An entity discovered via BFS expansion.
/// Spec ref: Section 3.5 "expanded_entities" JSON structure.
#[derive(Debug, Clone, Serialize)]
pub struct ExpandedEntityRef {
pub entity_ref: EntityRef,
pub depth: u32,
pub via_from: EntityRef,
pub via_reference_type: String,
pub via_source_method: String,
}
/// Reference to an unsynced external entity.
/// Spec ref: Section 3.5 "unresolved_references" JSON structure.
#[derive(Debug, Clone, Serialize)]
pub struct UnresolvedRef {
pub source: EntityRef,
pub target_project: Option<String>,
pub target_type: String,
pub target_iid: i64,
pub reference_type: String,
}
/// Complete result from the timeline pipeline.
#[derive(Debug, Clone, Serialize)]
pub struct TimelineResult {
pub query: String,
pub events: Vec<TimelineEvent>,
pub seed_entities: Vec<EntityRef>,
pub expanded_entities: Vec<ExpandedEntityRef>,
pub unresolved_references: Vec<UnresolvedRef>,
}
#[cfg(test)]
mod tests {
use super::*;
fn make_event(timestamp: i64, entity_id: i64, event_type: TimelineEventType) -> TimelineEvent {
TimelineEvent {
timestamp,
entity_type: "issue".to_owned(),
entity_id,
entity_iid: 1,
project_path: "group/project".to_owned(),
event_type,
summary: "test".to_owned(),
actor: None,
url: None,
is_seed: true,
}
}
#[test]
fn test_timeline_event_sort_by_timestamp() {
let mut events = [
make_event(3000, 1, TimelineEventType::Created),
make_event(1000, 2, TimelineEventType::Created),
make_event(2000, 3, TimelineEventType::Merged),
];
events.sort();
assert_eq!(events[0].timestamp, 1000);
assert_eq!(events[1].timestamp, 2000);
assert_eq!(events[2].timestamp, 3000);
}
#[test]
fn test_timeline_event_sort_tiebreak() {
let mut events = [
make_event(1000, 5, TimelineEventType::Created),
make_event(1000, 2, TimelineEventType::Merged),
make_event(1000, 2, TimelineEventType::Created),
];
events.sort();
// Same timestamp: sort by entity_id first, then event_type discriminant
assert_eq!(events[0].entity_id, 2);
assert!(matches!(events[0].event_type, TimelineEventType::Created));
assert_eq!(events[1].entity_id, 2);
assert!(matches!(events[1].event_type, TimelineEventType::Merged));
assert_eq!(events[2].entity_id, 5);
}
#[test]
fn test_timeline_event_type_serializes_tagged() {
let event_type = TimelineEventType::StateChanged {
state: "closed".to_owned(),
};
let json = serde_json::to_value(&event_type).unwrap();
assert_eq!(json["kind"], "state_changed");
assert_eq!(json["state"], "closed");
}
#[test]
fn test_note_evidence_has_note_id() {
let event_type = TimelineEventType::NoteEvidence {
note_id: 42,
snippet: "some text".to_owned(),
discussion_id: Some(7),
};
let json = serde_json::to_value(&event_type).unwrap();
assert_eq!(json["kind"], "note_evidence");
assert_eq!(json["note_id"], 42);
assert_eq!(json["snippet"], "some text");
assert_eq!(json["discussion_id"], 7);
}
#[test]
fn test_entity_id_skipped_in_serialization() {
let event = make_event(1000, 99, TimelineEventType::Created);
let json = serde_json::to_value(&event).unwrap();
assert!(json.get("entity_id").is_none());
assert_eq!(json["entity_iid"], 1);
}
#[test]
fn test_timeline_event_type_variant_count() {
// Verify all 9 variants serialize without panic
let variants: Vec<TimelineEventType> = vec![
TimelineEventType::Created,
TimelineEventType::StateChanged {
state: "closed".to_owned(),
},
TimelineEventType::LabelAdded {
label: "bug".to_owned(),
},
TimelineEventType::LabelRemoved {
label: "bug".to_owned(),
},
TimelineEventType::MilestoneSet {
milestone: "v1".to_owned(),
},
TimelineEventType::MilestoneRemoved {
milestone: "v1".to_owned(),
},
TimelineEventType::Merged,
TimelineEventType::NoteEvidence {
note_id: 1,
snippet: "text".to_owned(),
discussion_id: None,
},
TimelineEventType::CrossReferenced {
target: "!567".to_owned(),
},
];
assert_eq!(variants.len(), 9);
for v in &variants {
serde_json::to_value(v).unwrap();
}
}
}

View File

@@ -0,0 +1,687 @@
use rusqlite::Connection;
use crate::core::error::Result;
use crate::core::timeline::{EntityRef, ExpandedEntityRef, TimelineEvent, TimelineEventType};
/// Collect all events for seed and expanded entities, interleave chronologically.
///
/// Steps 4-5 of the timeline pipeline:
/// 1. For each entity, collect Created, StateChanged, Label, Milestone, Merged events
/// 2. Merge in evidence notes from the seed phase
/// 3. Sort chronologically with stable tiebreak
/// 4. Apply --since filter and --limit
pub fn collect_events(
conn: &Connection,
seed_entities: &[EntityRef],
expanded_entities: &[ExpandedEntityRef],
evidence_notes: &[TimelineEvent],
since_ms: Option<i64>,
limit: usize,
) -> Result<Vec<TimelineEvent>> {
let mut all_events: Vec<TimelineEvent> = Vec::new();
// Collect events for seed entities
for entity in seed_entities {
collect_entity_events(conn, entity, true, &mut all_events)?;
}
// Collect events for expanded entities
for expanded in expanded_entities {
collect_entity_events(conn, &expanded.entity_ref, false, &mut all_events)?;
}
// Add evidence notes from seed phase
all_events.extend(evidence_notes.iter().cloned());
// Sort chronologically (uses Ord impl from timeline.rs)
all_events.sort();
// Apply --since filter
if let Some(since) = since_ms {
all_events.retain(|e| e.timestamp >= since);
}
// Apply limit
all_events.truncate(limit);
Ok(all_events)
}
/// Collect all events for a single entity.
fn collect_entity_events(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
collect_creation_event(conn, entity, is_seed, events)?;
collect_state_events(conn, entity, is_seed, events)?;
collect_label_events(conn, entity, is_seed, events)?;
collect_milestone_events(conn, entity, is_seed, events)?;
collect_merged_event(conn, entity, is_seed, events)?;
Ok(())
}
/// Collect the Created event from the entity's own table.
fn collect_creation_event(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
let table = match entity.entity_type.as_str() {
"issue" => "issues",
"merge_request" => "merge_requests",
_ => return Ok(()),
};
let sql =
format!("SELECT created_at, author_username, title, web_url FROM {table} WHERE id = ?1");
let result = conn.query_row(&sql, rusqlite::params![entity.entity_id], |row| {
Ok((
row.get::<_, Option<i64>>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
))
});
if let Ok((Some(created_at), author, title, url)) = result {
let type_label = if entity.entity_type == "issue" {
"Issue"
} else {
"MR"
};
let title_str = title.as_deref().unwrap_or("(untitled)");
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type: TimelineEventType::Created,
summary: format!("{type_label} #{} created: {title_str}", entity.entity_iid),
actor: author,
url,
is_seed,
});
}
Ok(())
}
/// Collect state change events. State='merged' produces Merged, not StateChanged.
fn collect_state_events(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
let (id_col, id_val) = entity_id_column(entity);
let sql = format!(
"SELECT state, actor_username, created_at FROM resource_state_events
WHERE {id_col} = ?1
ORDER BY created_at ASC"
);
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params![id_val], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, i64>(2)?,
))
})?;
for row_result in rows {
let (state, actor, created_at) = row_result?;
// state='merged' is handled by collect_merged_event — skip here
if state == "merged" {
continue;
}
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type: TimelineEventType::StateChanged {
state: state.clone(),
},
summary: format!("State changed to {state}"),
actor,
url: None,
is_seed,
});
}
Ok(())
}
/// Collect label add/remove events.
fn collect_label_events(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
let (id_col, id_val) = entity_id_column(entity);
let sql = format!(
"SELECT action, label_name, actor_username, created_at FROM resource_label_events
WHERE {id_col} = ?1
ORDER BY created_at ASC"
);
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params![id_val], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, i64>(3)?,
))
})?;
for row_result in rows {
let (action, label_name, actor, created_at) = row_result?;
let label = label_name.unwrap_or_else(|| "[deleted label]".to_owned());
let (event_type, summary) = match action.as_str() {
"add" => (
TimelineEventType::LabelAdded {
label: label.clone(),
},
format!("Label added: {label}"),
),
"remove" => (
TimelineEventType::LabelRemoved {
label: label.clone(),
},
format!("Label removed: {label}"),
),
_ => continue,
};
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type,
summary,
actor,
url: None,
is_seed,
});
}
Ok(())
}
/// Collect milestone add/remove events.
fn collect_milestone_events(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
let (id_col, id_val) = entity_id_column(entity);
let sql = format!(
"SELECT action, milestone_title, actor_username, created_at FROM resource_milestone_events
WHERE {id_col} = ?1
ORDER BY created_at ASC"
);
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params![id_val], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, i64>(3)?,
))
})?;
for row_result in rows {
let (action, milestone_title, actor, created_at) = row_result?;
let milestone = milestone_title.unwrap_or_else(|| "[deleted milestone]".to_owned());
let (event_type, summary) = match action.as_str() {
"add" => (
TimelineEventType::MilestoneSet {
milestone: milestone.clone(),
},
format!("Milestone set: {milestone}"),
),
"remove" => (
TimelineEventType::MilestoneRemoved {
milestone: milestone.clone(),
},
format!("Milestone removed: {milestone}"),
),
_ => continue,
};
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type,
summary,
actor,
url: None,
is_seed,
});
}
Ok(())
}
/// Collect Merged event for MRs. Prefers merged_at from the MR table.
/// Falls back to resource_state_events WHERE state='merged' if merged_at is NULL.
fn collect_merged_event(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
if entity.entity_type != "merge_request" {
return Ok(());
}
// Try merged_at from merge_requests table first
let mr_result = conn.query_row(
"SELECT merged_at, merge_user_username, web_url FROM merge_requests WHERE id = ?1",
rusqlite::params![entity.entity_id],
|row| {
Ok((
row.get::<_, Option<i64>>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
if let Ok((Some(merged_at), merge_user, url)) = mr_result {
events.push(TimelineEvent {
timestamp: merged_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type: TimelineEventType::Merged,
summary: format!("MR !{} merged", entity.entity_iid),
actor: merge_user,
url,
is_seed,
});
return Ok(());
}
// Fallback: check resource_state_events for state='merged'
let fallback_result = conn.query_row(
"SELECT actor_username, created_at FROM resource_state_events
WHERE merge_request_id = ?1 AND state = 'merged'
ORDER BY created_at DESC LIMIT 1",
rusqlite::params![entity.entity_id],
|row| Ok((row.get::<_, Option<String>>(0)?, row.get::<_, i64>(1)?)),
);
if let Ok((actor, created_at)) = fallback_result {
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type: TimelineEventType::Merged,
summary: format!("MR !{} merged", entity.entity_iid),
actor,
url: None,
is_seed,
});
}
Ok(())
}
/// Return the correct column name and value for querying resource event tables.
fn entity_id_column(entity: &EntityRef) -> (&'static str, i64) {
match entity.entity_type.as_str() {
"issue" => ("issue_id", entity.entity_id),
"merge_request" => ("merge_request_id", entity.entity_id),
_ => ("issue_id", entity.entity_id), // shouldn't happen
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, ?3, 'Auth bug', 'opened', 'alice', 1000, 2000, 3000, 'https://gitlab.com/group/project/-/issues/1')",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_mr(conn: &Connection, project_id: i64, iid: i64, merged_at: Option<i64>) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, merged_at, merge_user_username, web_url) VALUES (?1, ?2, ?3, 'Fix auth', 'merged', 'bob', 1000, 5000, 6000, ?4, 'charlie', 'https://gitlab.com/group/project/-/merge_requests/10')",
rusqlite::params![iid * 100, project_id, iid, merged_at],
)
.unwrap();
conn.last_insert_rowid()
}
fn make_entity_ref(entity_type: &str, entity_id: i64, iid: i64) -> EntityRef {
EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid: iid,
project_path: "group/project".to_owned(),
}
}
fn insert_state_event(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
state: &str,
created_at: i64,
) {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO resource_state_events (gitlab_id, project_id, issue_id, merge_request_id, state, actor_username, created_at) VALUES (?1, ?2, ?3, ?4, ?5, 'alice', ?6)",
rusqlite::params![gitlab_id, project_id, issue_id, mr_id, state, created_at],
)
.unwrap();
}
fn insert_label_event(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
action: &str,
label_name: Option<&str>,
created_at: i64,
) {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO resource_label_events (gitlab_id, project_id, issue_id, merge_request_id, action, label_name, actor_username, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, 'alice', ?7)",
rusqlite::params![gitlab_id, project_id, issue_id, mr_id, action, label_name, created_at],
)
.unwrap();
}
fn insert_milestone_event(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
action: &str,
milestone_title: Option<&str>,
created_at: i64,
) {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO resource_milestone_events (gitlab_id, project_id, issue_id, merge_request_id, action, milestone_title, actor_username, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, 'alice', ?7)",
rusqlite::params![gitlab_id, project_id, issue_id, mr_id, action, milestone_title, created_at],
)
.unwrap();
}
#[test]
fn test_collect_creation_event() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap();
assert_eq!(events.len(), 1);
assert!(matches!(events[0].event_type, TimelineEventType::Created));
assert_eq!(events[0].timestamp, 1000);
assert_eq!(events[0].actor, Some("alice".to_owned()));
assert!(events[0].is_seed);
}
#[test]
fn test_collect_state_events() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000);
insert_state_event(&conn, project_id, Some(issue_id), None, "reopened", 4000);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap();
// Created + 2 state changes = 3
assert_eq!(events.len(), 3);
assert!(matches!(events[0].event_type, TimelineEventType::Created));
assert!(matches!(
events[1].event_type,
TimelineEventType::StateChanged { ref state } if state == "closed"
));
assert!(matches!(
events[2].event_type,
TimelineEventType::StateChanged { ref state } if state == "reopened"
));
}
#[test]
fn test_collect_merged_dedup() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let mr_id = insert_mr(&conn, project_id, 10, Some(5000));
// Also add a state event for 'merged' — this should NOT produce a StateChanged
insert_state_event(&conn, project_id, None, Some(mr_id), "merged", 5000);
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap();
// Should have Created + Merged (not Created + StateChanged{merged} + Merged)
let merged_count = events
.iter()
.filter(|e| matches!(e.event_type, TimelineEventType::Merged))
.count();
let state_merged_count = events
.iter()
.filter(|e| matches!(&e.event_type, TimelineEventType::StateChanged { state } if state == "merged"))
.count();
assert_eq!(merged_count, 1);
assert_eq!(state_merged_count, 0);
}
#[test]
fn test_collect_null_label_fallback() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
insert_label_event(&conn, project_id, Some(issue_id), None, "add", None, 2000);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap();
let label_event = events.iter().find(|e| {
matches!(&e.event_type, TimelineEventType::LabelAdded { label } if label == "[deleted label]")
});
assert!(label_event.is_some());
}
#[test]
fn test_collect_null_milestone_fallback() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
insert_milestone_event(&conn, project_id, Some(issue_id), None, "add", None, 2000);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap();
let ms_event = events.iter().find(|e| {
matches!(&e.event_type, TimelineEventType::MilestoneSet { milestone } if milestone == "[deleted milestone]")
});
assert!(ms_event.is_some());
}
#[test]
fn test_collect_since_filter() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000);
insert_state_event(&conn, project_id, Some(issue_id), None, "reopened", 5000);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
// Since 4000: should exclude Created (1000) and closed (3000)
let events = collect_events(&conn, &seeds, &[], &[], Some(4000), 100).unwrap();
assert_eq!(events.len(), 1);
assert_eq!(events[0].timestamp, 5000);
}
#[test]
fn test_collect_chronological_sort() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10, Some(4000));
insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000);
insert_label_event(
&conn,
project_id,
None,
Some(mr_id),
"add",
Some("bug"),
2000,
);
let seeds = vec![
make_entity_ref("issue", issue_id, 1),
make_entity_ref("merge_request", mr_id, 10),
];
let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap();
// Verify chronological order
for window in events.windows(2) {
assert!(window[0].timestamp <= window[1].timestamp);
}
}
#[test]
fn test_collect_respects_limit() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
for i in 0..20 {
insert_state_event(
&conn,
project_id,
Some(issue_id),
None,
"closed",
3000 + i * 100,
);
}
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let events = collect_events(&conn, &seeds, &[], &[], None, 5).unwrap();
assert_eq!(events.len(), 5);
}
#[test]
fn test_collect_evidence_notes_included() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let evidence = vec![TimelineEvent {
timestamp: 2500,
entity_type: "issue".to_owned(),
entity_id: issue_id,
entity_iid: 1,
project_path: "group/project".to_owned(),
event_type: TimelineEventType::NoteEvidence {
note_id: 42,
snippet: "relevant note".to_owned(),
discussion_id: Some(1),
},
summary: "Note by alice".to_owned(),
actor: Some("alice".to_owned()),
url: None,
is_seed: true,
}];
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let events = collect_events(&conn, &seeds, &[], &evidence, None, 100).unwrap();
let note_event = events.iter().find(|e| {
matches!(
&e.event_type,
TimelineEventType::NoteEvidence { note_id, .. } if *note_id == 42
)
});
assert!(note_event.is_some());
}
#[test]
fn test_collect_merged_fallback_to_state_event() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
// MR with merged_at = NULL
let mr_id = insert_mr(&conn, project_id, 10, None);
// But has a state event for 'merged'
insert_state_event(&conn, project_id, None, Some(mr_id), "merged", 5000);
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let events = collect_events(&conn, &seeds, &[], &[], None, 100).unwrap();
let merged = events
.iter()
.find(|e| matches!(e.event_type, TimelineEventType::Merged));
assert!(merged.is_some());
assert_eq!(merged.unwrap().timestamp, 5000);
}
}

592
src/core/timeline_expand.rs Normal file
View File

@@ -0,0 +1,592 @@
use std::collections::{HashSet, VecDeque};
use rusqlite::Connection;
use crate::core::error::Result;
use crate::core::timeline::{EntityRef, ExpandedEntityRef, UnresolvedRef};
/// Result of the expand phase.
pub struct ExpandResult {
pub expanded_entities: Vec<ExpandedEntityRef>,
pub unresolved_references: Vec<UnresolvedRef>,
}
/// Run the EXPAND phase of the timeline pipeline (BFS over entity_references).
///
/// Starting from seed entities, traverses cross-references (both outgoing and incoming)
/// to discover related entities. Collects provenance (who referenced whom, how).
pub fn expand_timeline(
conn: &Connection,
seeds: &[EntityRef],
depth: u32,
include_mentions: bool,
max_entities: usize,
) -> Result<ExpandResult> {
if depth == 0 || seeds.is_empty() {
return Ok(ExpandResult {
expanded_entities: Vec::new(),
unresolved_references: Vec::new(),
});
}
let edge_types = if include_mentions {
vec!["closes", "related", "mentioned"]
} else {
vec!["closes", "related"]
};
let mut visited: HashSet<(String, i64)> = seeds
.iter()
.map(|s| (s.entity_type.clone(), s.entity_id))
.collect();
let mut queue: VecDeque<(EntityRef, u32)> = seeds.iter().map(|s| (s.clone(), 0)).collect();
let mut expanded = Vec::new();
let mut unresolved = Vec::new();
while let Some((current, current_depth)) = queue.pop_front() {
if expanded.len() >= max_entities {
break;
}
let neighbors = find_neighbors(conn, &current, &edge_types)?;
for neighbor in neighbors {
match neighbor {
Neighbor::Resolved {
entity_ref,
reference_type,
source_method,
} => {
let key = (entity_ref.entity_type.clone(), entity_ref.entity_id);
if !visited.insert(key) {
continue;
}
expanded.push(ExpandedEntityRef {
entity_ref: entity_ref.clone(),
depth: current_depth + 1,
via_from: current.clone(),
via_reference_type: reference_type,
via_source_method: source_method,
});
if expanded.len() >= max_entities {
break;
}
if current_depth + 1 < depth {
queue.push_back((entity_ref, current_depth + 1));
}
}
Neighbor::Unresolved(unresolved_ref) => {
unresolved.push(unresolved_ref);
}
}
}
}
Ok(ExpandResult {
expanded_entities: expanded,
unresolved_references: unresolved,
})
}
enum Neighbor {
Resolved {
entity_ref: EntityRef,
reference_type: String,
source_method: String,
},
Unresolved(UnresolvedRef),
}
/// Find all neighbors (outgoing + incoming) for an entity in entity_references.
fn find_neighbors(
conn: &Connection,
entity: &EntityRef,
edge_types: &[&str],
) -> Result<Vec<Neighbor>> {
let mut neighbors = Vec::new();
find_outgoing(conn, entity, edge_types, &mut neighbors)?;
find_incoming(conn, entity, edge_types, &mut neighbors)?;
Ok(neighbors)
}
/// Find outgoing references: current entity is the source.
fn find_outgoing(
conn: &Connection,
entity: &EntityRef,
edge_types: &[&str],
neighbors: &mut Vec<Neighbor>,
) -> Result<()> {
let placeholders: String = edge_types
.iter()
.enumerate()
.map(|(i, _)| format!("?{}", i + 3))
.collect::<Vec<_>>()
.join(", ");
let sql = format!(
"SELECT target_entity_type, target_entity_id, target_project_path, target_entity_iid,
reference_type, source_method
FROM entity_references
WHERE source_entity_type = ?1
AND source_entity_id = ?2
AND reference_type IN ({placeholders})"
);
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![
Box::new(entity.entity_type.clone()),
Box::new(entity.entity_id),
];
for et in edge_types {
params.push(Box::new(et.to_string()));
}
let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(params_refs.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?, // target_entity_type
row.get::<_, Option<i64>>(1)?, // target_entity_id
row.get::<_, Option<String>>(2)?, // target_project_path
row.get::<_, Option<i64>>(3)?, // target_entity_iid
row.get::<_, String>(4)?, // reference_type
row.get::<_, String>(5)?, // source_method
))
})?;
for row_result in rows {
let (target_type, target_id, target_project_path, target_iid, ref_type, source_method) =
row_result?;
match target_id {
Some(tid) => {
if let Some(resolved) = resolve_entity_ref(conn, &target_type, tid)? {
neighbors.push(Neighbor::Resolved {
entity_ref: resolved,
reference_type: ref_type,
source_method,
});
}
}
None => {
neighbors.push(Neighbor::Unresolved(UnresolvedRef {
source: entity.clone(),
target_project: target_project_path,
target_type,
target_iid: target_iid.unwrap_or(0),
reference_type: ref_type,
}));
}
}
}
Ok(())
}
/// Find incoming references: current entity is the target.
fn find_incoming(
conn: &Connection,
entity: &EntityRef,
edge_types: &[&str],
neighbors: &mut Vec<Neighbor>,
) -> Result<()> {
let placeholders: String = edge_types
.iter()
.enumerate()
.map(|(i, _)| format!("?{}", i + 3))
.collect::<Vec<_>>()
.join(", ");
let sql = format!(
"SELECT source_entity_type, source_entity_id, reference_type, source_method
FROM entity_references
WHERE target_entity_type = ?1
AND target_entity_id = ?2
AND reference_type IN ({placeholders})"
);
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![
Box::new(entity.entity_type.clone()),
Box::new(entity.entity_id),
];
for et in edge_types {
params.push(Box::new(et.to_string()));
}
let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(params_refs.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?, // source_entity_type
row.get::<_, i64>(1)?, // source_entity_id
row.get::<_, String>(2)?, // reference_type
row.get::<_, String>(3)?, // source_method
))
})?;
for row_result in rows {
let (source_type, source_id, ref_type, source_method) = row_result?;
if let Some(resolved) = resolve_entity_ref(conn, &source_type, source_id)? {
neighbors.push(Neighbor::Resolved {
entity_ref: resolved,
reference_type: ref_type,
source_method,
});
}
}
Ok(())
}
/// Resolve an entity ID to a full EntityRef with iid and project_path.
fn resolve_entity_ref(
conn: &Connection,
entity_type: &str,
entity_id: i64,
) -> Result<Option<EntityRef>> {
let table = match entity_type {
"issue" => "issues",
"merge_request" => "merge_requests",
_ => return Ok(None),
};
let sql = format!(
"SELECT e.iid, p.path_with_namespace
FROM {table} e
JOIN projects p ON p.id = e.project_id
WHERE e.id = ?1"
);
let result = conn.query_row(&sql, rusqlite::params![entity_id], |row| {
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
});
match result {
Ok((iid, project_path)) => Ok(Some(EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid: iid,
project_path,
})),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(e.into()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test', 'opened', 'alice', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
#[allow(clippy::too_many_arguments)]
fn insert_ref(
conn: &Connection,
project_id: i64,
source_type: &str,
source_id: i64,
target_type: &str,
target_id: Option<i64>,
ref_type: &str,
source_method: &str,
) {
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, reference_type, source_method, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1000)",
rusqlite::params![project_id, source_type, source_id, target_type, target_id, ref_type, source_method],
)
.unwrap();
}
fn make_entity_ref(entity_type: &str, entity_id: i64, iid: i64) -> EntityRef {
EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid: iid,
project_path: "group/project".to_owned(),
}
}
#[test]
fn test_expand_depth_zero() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 0, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
assert!(result.unresolved_references.is_empty());
}
#[test]
fn test_expand_finds_linked_entity() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR closes issue
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
assert_eq!(
result.expanded_entities[0].entity_ref.entity_type,
"merge_request"
);
assert_eq!(result.expanded_entities[0].entity_ref.entity_iid, 10);
assert_eq!(result.expanded_entities[0].depth, 1);
}
#[test]
fn test_expand_bidirectional() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR closes issue (MR is source, issue is target)
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
// Starting from MR should find the issue (outgoing)
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
assert_eq!(result.expanded_entities[0].entity_ref.entity_type, "issue");
}
#[test]
fn test_expand_respects_max_entities() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
// Create 10 MRs that all close this issue
for i in 2..=11 {
let mr_id = insert_mr(&conn, project_id, i);
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
}
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 3).unwrap();
assert!(result.expanded_entities.len() <= 3);
}
#[test]
fn test_expand_skips_mentions_by_default() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR mentions issue (should be skipped by default)
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"mentioned",
"note_parse",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
}
#[test]
fn test_expand_includes_mentions_when_flagged() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR mentions issue
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"mentioned",
"note_parse",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, true, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
}
#[test]
fn test_expand_collects_unresolved() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
// Unresolved cross-project reference
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, target_project_path, target_entity_iid, reference_type, source_method, created_at) VALUES (?1, 'issue', ?2, 'issue', NULL, 'other/repo', 42, 'closes', 'description_parse', 1000)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
assert_eq!(result.unresolved_references.len(), 1);
assert_eq!(
result.unresolved_references[0].target_project,
Some("other/repo".to_owned())
);
assert_eq!(result.unresolved_references[0].target_iid, 42);
}
#[test]
fn test_expand_tracks_provenance() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
let expanded = &result.expanded_entities[0];
assert_eq!(expanded.via_reference_type, "closes");
assert_eq!(expanded.via_source_method, "api");
assert_eq!(expanded.via_from.entity_type, "issue");
assert_eq!(expanded.via_from.entity_id, issue_id);
}
#[test]
fn test_expand_no_duplicates() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// Two references from MR to same issue (different methods)
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"related",
"note_parse",
);
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
// Should only appear once (first-come wins)
assert_eq!(result.expanded_entities.len(), 1);
}
#[test]
fn test_expand_empty_seeds() {
let conn = setup_test_db();
let result = expand_timeline(&conn, &[], 1, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
}
}

573
src/core/timeline_seed.rs Normal file
View File

@@ -0,0 +1,573 @@
use std::collections::HashSet;
use rusqlite::Connection;
use crate::core::error::Result;
use crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType};
use crate::search::{FtsQueryMode, to_fts_query};
/// Result of the seed + hydrate phases.
pub struct SeedResult {
pub seed_entities: Vec<EntityRef>,
pub evidence_notes: Vec<TimelineEvent>,
}
/// Run the SEED + HYDRATE phases of the timeline pipeline.
///
/// 1. SEED: FTS5 keyword search over documents -> matched document IDs
/// 2. HYDRATE: Map document IDs -> source entities + top matched notes as evidence
///
/// Discussion documents are resolved to their parent entity (issue or MR).
/// Entities are deduplicated. Evidence notes are capped at `max_evidence`.
pub fn seed_timeline(
conn: &Connection,
query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
max_seeds: usize,
max_evidence: usize,
) -> Result<SeedResult> {
let fts_query = to_fts_query(query, FtsQueryMode::Safe);
if fts_query.is_empty() {
return Ok(SeedResult {
seed_entities: Vec::new(),
evidence_notes: Vec::new(),
});
}
let seed_entities = find_seed_entities(conn, &fts_query, project_id, since_ms, max_seeds)?;
let evidence_notes = find_evidence_notes(conn, &fts_query, project_id, since_ms, max_evidence)?;
Ok(SeedResult {
seed_entities,
evidence_notes,
})
}
/// Find seed entities via FTS5 search, resolving discussions to their parent entity.
fn find_seed_entities(
conn: &Connection,
fts_query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
max_seeds: usize,
) -> Result<Vec<EntityRef>> {
let sql = r"
SELECT d.source_type, d.source_id, d.project_id,
disc.issue_id, disc.merge_request_id
FROM documents_fts
JOIN documents d ON d.id = documents_fts.rowid
LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
WHERE documents_fts MATCH ?1
AND (?2 IS NULL OR d.project_id = ?2)
AND (?3 IS NULL OR d.updated_at >= ?3)
ORDER BY rank
LIMIT ?4
";
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map(
rusqlite::params![fts_query, project_id, since_ms, (max_seeds * 3) as i64],
|row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, i64>(2)?,
row.get::<_, Option<i64>>(3)?,
row.get::<_, Option<i64>>(4)?,
))
},
)?;
let mut seen = HashSet::new();
let mut entities = Vec::new();
for row_result in rows {
let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id) = row_result?;
let (entity_type, entity_id) = match source_type.as_str() {
"issue" => ("issue".to_owned(), source_id),
"merge_request" => ("merge_request".to_owned(), source_id),
"discussion" => {
if let Some(issue_id) = disc_issue_id {
("issue".to_owned(), issue_id)
} else if let Some(mr_id) = disc_mr_id {
("merge_request".to_owned(), mr_id)
} else {
continue; // orphaned discussion
}
}
_ => continue,
};
let key = (entity_type.clone(), entity_id);
if !seen.insert(key) {
continue;
}
if let Some(entity_ref) = resolve_entity(conn, &entity_type, entity_id, proj_id)? {
entities.push(entity_ref);
}
if entities.len() >= max_seeds {
break;
}
}
Ok(entities)
}
/// Resolve an entity ID to a full EntityRef with iid and project_path.
fn resolve_entity(
conn: &Connection,
entity_type: &str,
entity_id: i64,
project_id: i64,
) -> Result<Option<EntityRef>> {
let (table, id_col) = match entity_type {
"issue" => ("issues", "id"),
"merge_request" => ("merge_requests", "id"),
_ => return Ok(None),
};
let sql = format!(
"SELECT e.iid, p.path_with_namespace
FROM {table} e
JOIN projects p ON p.id = e.project_id
WHERE e.{id_col} = ?1 AND e.project_id = ?2"
);
let result = conn.query_row(&sql, rusqlite::params![entity_id, project_id], |row| {
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
});
match result {
Ok((iid, project_path)) => Ok(Some(EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid: iid,
project_path,
})),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(e.into()),
}
}
/// Find evidence notes: FTS5-matched discussion notes that provide context.
fn find_evidence_notes(
conn: &Connection,
fts_query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
max_evidence: usize,
) -> Result<Vec<TimelineEvent>> {
let sql = r"
SELECT n.id AS note_id, n.body, n.created_at, n.author_username,
disc.id AS discussion_id,
CASE WHEN disc.issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END AS parent_type,
COALESCE(disc.issue_id, disc.merge_request_id) AS parent_entity_id,
d.project_id
FROM documents_fts
JOIN documents d ON d.id = documents_fts.rowid
JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
JOIN notes n ON n.discussion_id = disc.id AND n.is_system = 0
WHERE documents_fts MATCH ?1
AND (?2 IS NULL OR d.project_id = ?2)
AND (?3 IS NULL OR d.updated_at >= ?3)
ORDER BY rank
LIMIT ?4
";
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map(
rusqlite::params![fts_query, project_id, since_ms, max_evidence as i64],
|row| {
Ok((
row.get::<_, i64>(0)?, // note_id
row.get::<_, Option<String>>(1)?, // body
row.get::<_, i64>(2)?, // created_at
row.get::<_, Option<String>>(3)?, // author
row.get::<_, i64>(4)?, // discussion_id
row.get::<_, String>(5)?, // parent_type
row.get::<_, i64>(6)?, // parent_entity_id
row.get::<_, i64>(7)?, // project_id
))
},
)?;
let mut events = Vec::new();
for row_result in rows {
let (
note_id,
body,
created_at,
author,
discussion_id,
parent_type,
parent_entity_id,
proj_id,
) = row_result?;
let snippet = truncate_to_chars(body.as_deref().unwrap_or(""), 200);
let entity_ref = resolve_entity(conn, &parent_type, parent_entity_id, proj_id)?;
let (iid, project_path) = match entity_ref {
Some(ref e) => (e.entity_iid, e.project_path.clone()),
None => continue,
};
events.push(TimelineEvent {
timestamp: created_at,
entity_type: parent_type,
entity_id: parent_entity_id,
entity_iid: iid,
project_path,
event_type: TimelineEventType::NoteEvidence {
note_id,
snippet,
discussion_id: Some(discussion_id),
},
summary: format!("Note by {}", author.as_deref().unwrap_or("unknown")),
actor: author,
url: None,
is_seed: true,
});
}
Ok(events)
}
/// Truncate a string to at most `max_chars` characters on a safe UTF-8 boundary.
fn truncate_to_chars(s: &str, max_chars: usize) -> String {
let char_count = s.chars().count();
if char_count <= max_chars {
return s.to_owned();
}
let byte_end = s
.char_indices()
.nth(max_chars)
.map(|(i, _)| i)
.unwrap_or(s.len());
s[..byte_end].to_owned()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_test_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_test_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test issue', 'opened', 'alice', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_test_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_document(
conn: &Connection,
source_type: &str,
source_id: i64,
project_id: i64,
content: &str,
) -> i64 {
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![source_type, source_id, project_id, content, format!("hash_{source_id}")],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_discussion(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
) -> i64 {
let noteable_type = if issue_id.is_some() {
"Issue"
} else {
"MergeRequest"
};
conn.execute(
"INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, ?3, ?4, ?5, 0)",
rusqlite::params![format!("disc_{}", rand::random::<u32>()), project_id, issue_id, mr_id, noteable_type],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_note(
conn: &Connection,
discussion_id: i64,
project_id: i64,
body: &str,
is_system: bool,
) -> i64 {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, is_system, author_username, body, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, ?4, 'alice', ?5, 5000, 5000, 5000)",
rusqlite::params![gitlab_id, discussion_id, project_id, is_system as i32, body],
)
.unwrap();
conn.last_insert_rowid()
}
#[test]
fn test_seed_empty_query_returns_empty() {
let conn = setup_test_db();
let result = seed_timeline(&conn, "", None, None, 50, 10).unwrap();
assert!(result.seed_entities.is_empty());
assert!(result.evidence_notes.is_empty());
}
#[test]
fn test_seed_no_matches_returns_empty() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
insert_document(
&conn,
"issue",
issue_id,
project_id,
"unrelated content here",
);
let result = seed_timeline(&conn, "nonexistent_xyzzy_query", None, None, 50, 10).unwrap();
assert!(result.seed_entities.is_empty());
}
#[test]
fn test_seed_finds_issue() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
insert_document(
&conn,
"issue",
issue_id,
project_id,
"authentication error in login flow",
);
let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 42);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
#[test]
fn test_seed_finds_mr() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
insert_document(
&conn,
"merge_request",
mr_id,
project_id,
"fix authentication bug",
);
let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "merge_request");
assert_eq!(result.seed_entities[0].entity_iid, 99);
}
#[test]
fn test_seed_deduplicates_entities() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 10);
// Two documents referencing the same issue
insert_document(
&conn,
"issue",
issue_id,
project_id,
"authentication error first doc",
);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"authentication error second doc",
);
let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
// Should deduplicate: both map to the same issue
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_iid, 10);
}
#[test]
fn test_seed_resolves_discussion_to_parent() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 7);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline failed",
);
let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 7);
}
#[test]
fn test_seed_evidence_capped() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
// Create 15 discussion documents with notes about "deployment"
for i in 0..15 {
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
&format!("deployment issue number {i}"),
);
insert_note(
&conn,
disc_id,
project_id,
&format!("deployment note {i}"),
false,
);
}
let result = seed_timeline(&conn, "deployment", None, None, 50, 5).unwrap();
assert!(result.evidence_notes.len() <= 5);
}
#[test]
fn test_seed_evidence_snippet_truncated() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment configuration",
);
let long_body = "x".repeat(500);
insert_note(&conn, disc_id, project_id, &long_body, false);
let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap();
assert!(!result.evidence_notes.is_empty());
if let TimelineEventType::NoteEvidence { snippet, .. } =
&result.evidence_notes[0].event_type
{
assert!(snippet.chars().count() <= 200);
} else {
panic!("Expected NoteEvidence");
}
}
#[test]
fn test_seed_respects_project_filter() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
// Insert a second project
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (2, 'other/repo', 'https://gitlab.com/other/repo')",
[],
)
.unwrap();
let project2_id = conn.last_insert_rowid();
let issue1_id = insert_test_issue(&conn, project_id, 1);
insert_document(
&conn,
"issue",
issue1_id,
project_id,
"authentication error",
);
let issue2_id = insert_test_issue(&conn, project2_id, 2);
insert_document(
&conn,
"issue",
issue2_id,
project2_id,
"authentication error",
);
// Filter to project 1 only
let result =
seed_timeline(&conn, "authentication", Some(project_id), None, 50, 10).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
#[test]
fn test_truncate_to_chars_short() {
assert_eq!(truncate_to_chars("hello", 200), "hello");
}
#[test]
fn test_truncate_to_chars_long() {
let long = "a".repeat(300);
let result = truncate_to_chars(&long, 200);
assert_eq!(result.chars().count(), 200);
}
#[test]
fn test_truncate_to_chars_multibyte() {
let s = "\u{1F600}".repeat(300); // emoji
let result = truncate_to_chars(&s, 200);
assert_eq!(result.chars().count(), 200);
// Verify valid UTF-8
assert!(std::str::from_utf8(result.as_bytes()).is_ok());
}
}