Files
gitlore/src/core/timeline_expand.rs
Taylor Eernisse 3767c33c28 feat: Implement Gate 3 timeline pipeline and Gate 4 migration scaffolding
Complete 5 beads for the Phase B temporal intelligence feature:

- bd-1oo: Register migration 015 (commit SHAs, closes watermark) and
  create migration 016 (mr_file_changes table with 4 indexes for
  Gate 4 file-history)

- bd-20e: Define TimelineEvent model with 9 event type variants,
  EntityRef, ExpandedEntityRef, UnresolvedRef, and TimelineResult
  types. Ord impl for chronological sorting with stable tiebreak.

- bd-32q: Implement timeline seed phase - FTS5 keyword search to
  entity IDs with discussion-to-parent resolution, entity dedup,
  and evidence note extraction with snippet truncation.

- bd-ypa: Implement timeline expand phase - BFS cross-reference
  expansion over entity_references with bidirectional traversal,
  depth limiting, mention filtering, provenance tracking, and
  unresolved reference collection.

- bd-3as: Implement timeline event collection - gathers Created,
  StateChanged, LabelAdded/Removed, MilestoneSet/Removed, Merged,
  and NoteEvidence events. Merged dedup (state=merged -> Merged
  variant only). NULL label/milestone fallbacks. Chronological
  interleaving with since filter and limit.

38 new tests, all 445 tests pass. All quality gates clean.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-05 16:54:28 -05:00

593 lines
18 KiB
Rust

use std::collections::{HashSet, VecDeque};
use rusqlite::Connection;
use crate::core::error::Result;
use crate::core::timeline::{EntityRef, ExpandedEntityRef, UnresolvedRef};
/// Result of the expand phase.
pub struct ExpandResult {
pub expanded_entities: Vec<ExpandedEntityRef>,
pub unresolved_references: Vec<UnresolvedRef>,
}
/// Run the EXPAND phase of the timeline pipeline (BFS over entity_references).
///
/// Starting from seed entities, traverses cross-references (both outgoing and incoming)
/// to discover related entities. Collects provenance (who referenced whom, how).
pub fn expand_timeline(
conn: &Connection,
seeds: &[EntityRef],
depth: u32,
include_mentions: bool,
max_entities: usize,
) -> Result<ExpandResult> {
if depth == 0 || seeds.is_empty() {
return Ok(ExpandResult {
expanded_entities: Vec::new(),
unresolved_references: Vec::new(),
});
}
let edge_types = if include_mentions {
vec!["closes", "related", "mentioned"]
} else {
vec!["closes", "related"]
};
let mut visited: HashSet<(String, i64)> = seeds
.iter()
.map(|s| (s.entity_type.clone(), s.entity_id))
.collect();
let mut queue: VecDeque<(EntityRef, u32)> = seeds.iter().map(|s| (s.clone(), 0)).collect();
let mut expanded = Vec::new();
let mut unresolved = Vec::new();
while let Some((current, current_depth)) = queue.pop_front() {
if expanded.len() >= max_entities {
break;
}
let neighbors = find_neighbors(conn, &current, &edge_types)?;
for neighbor in neighbors {
match neighbor {
Neighbor::Resolved {
entity_ref,
reference_type,
source_method,
} => {
let key = (entity_ref.entity_type.clone(), entity_ref.entity_id);
if !visited.insert(key) {
continue;
}
expanded.push(ExpandedEntityRef {
entity_ref: entity_ref.clone(),
depth: current_depth + 1,
via_from: current.clone(),
via_reference_type: reference_type,
via_source_method: source_method,
});
if expanded.len() >= max_entities {
break;
}
if current_depth + 1 < depth {
queue.push_back((entity_ref, current_depth + 1));
}
}
Neighbor::Unresolved(unresolved_ref) => {
unresolved.push(unresolved_ref);
}
}
}
}
Ok(ExpandResult {
expanded_entities: expanded,
unresolved_references: unresolved,
})
}
enum Neighbor {
Resolved {
entity_ref: EntityRef,
reference_type: String,
source_method: String,
},
Unresolved(UnresolvedRef),
}
/// Find all neighbors (outgoing + incoming) for an entity in entity_references.
fn find_neighbors(
conn: &Connection,
entity: &EntityRef,
edge_types: &[&str],
) -> Result<Vec<Neighbor>> {
let mut neighbors = Vec::new();
find_outgoing(conn, entity, edge_types, &mut neighbors)?;
find_incoming(conn, entity, edge_types, &mut neighbors)?;
Ok(neighbors)
}
/// Find outgoing references: current entity is the source.
fn find_outgoing(
conn: &Connection,
entity: &EntityRef,
edge_types: &[&str],
neighbors: &mut Vec<Neighbor>,
) -> Result<()> {
let placeholders: String = edge_types
.iter()
.enumerate()
.map(|(i, _)| format!("?{}", i + 3))
.collect::<Vec<_>>()
.join(", ");
let sql = format!(
"SELECT target_entity_type, target_entity_id, target_project_path, target_entity_iid,
reference_type, source_method
FROM entity_references
WHERE source_entity_type = ?1
AND source_entity_id = ?2
AND reference_type IN ({placeholders})"
);
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![
Box::new(entity.entity_type.clone()),
Box::new(entity.entity_id),
];
for et in edge_types {
params.push(Box::new(et.to_string()));
}
let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(params_refs.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?, // target_entity_type
row.get::<_, Option<i64>>(1)?, // target_entity_id
row.get::<_, Option<String>>(2)?, // target_project_path
row.get::<_, Option<i64>>(3)?, // target_entity_iid
row.get::<_, String>(4)?, // reference_type
row.get::<_, String>(5)?, // source_method
))
})?;
for row_result in rows {
let (target_type, target_id, target_project_path, target_iid, ref_type, source_method) =
row_result?;
match target_id {
Some(tid) => {
if let Some(resolved) = resolve_entity_ref(conn, &target_type, tid)? {
neighbors.push(Neighbor::Resolved {
entity_ref: resolved,
reference_type: ref_type,
source_method,
});
}
}
None => {
neighbors.push(Neighbor::Unresolved(UnresolvedRef {
source: entity.clone(),
target_project: target_project_path,
target_type,
target_iid: target_iid.unwrap_or(0),
reference_type: ref_type,
}));
}
}
}
Ok(())
}
/// Find incoming references: current entity is the target.
fn find_incoming(
conn: &Connection,
entity: &EntityRef,
edge_types: &[&str],
neighbors: &mut Vec<Neighbor>,
) -> Result<()> {
let placeholders: String = edge_types
.iter()
.enumerate()
.map(|(i, _)| format!("?{}", i + 3))
.collect::<Vec<_>>()
.join(", ");
let sql = format!(
"SELECT source_entity_type, source_entity_id, reference_type, source_method
FROM entity_references
WHERE target_entity_type = ?1
AND target_entity_id = ?2
AND reference_type IN ({placeholders})"
);
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![
Box::new(entity.entity_type.clone()),
Box::new(entity.entity_id),
];
for et in edge_types {
params.push(Box::new(et.to_string()));
}
let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(params_refs.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?, // source_entity_type
row.get::<_, i64>(1)?, // source_entity_id
row.get::<_, String>(2)?, // reference_type
row.get::<_, String>(3)?, // source_method
))
})?;
for row_result in rows {
let (source_type, source_id, ref_type, source_method) = row_result?;
if let Some(resolved) = resolve_entity_ref(conn, &source_type, source_id)? {
neighbors.push(Neighbor::Resolved {
entity_ref: resolved,
reference_type: ref_type,
source_method,
});
}
}
Ok(())
}
/// Resolve an entity ID to a full EntityRef with iid and project_path.
fn resolve_entity_ref(
conn: &Connection,
entity_type: &str,
entity_id: i64,
) -> Result<Option<EntityRef>> {
let table = match entity_type {
"issue" => "issues",
"merge_request" => "merge_requests",
_ => return Ok(None),
};
let sql = format!(
"SELECT e.iid, p.path_with_namespace
FROM {table} e
JOIN projects p ON p.id = e.project_id
WHERE e.id = ?1"
);
let result = conn.query_row(&sql, rusqlite::params![entity_id], |row| {
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
});
match result {
Ok((iid, project_path)) => Ok(Some(EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid: iid,
project_path,
})),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(e.into()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test', 'opened', 'alice', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
#[allow(clippy::too_many_arguments)]
fn insert_ref(
conn: &Connection,
project_id: i64,
source_type: &str,
source_id: i64,
target_type: &str,
target_id: Option<i64>,
ref_type: &str,
source_method: &str,
) {
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, reference_type, source_method, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1000)",
rusqlite::params![project_id, source_type, source_id, target_type, target_id, ref_type, source_method],
)
.unwrap();
}
fn make_entity_ref(entity_type: &str, entity_id: i64, iid: i64) -> EntityRef {
EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid: iid,
project_path: "group/project".to_owned(),
}
}
#[test]
fn test_expand_depth_zero() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 0, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
assert!(result.unresolved_references.is_empty());
}
#[test]
fn test_expand_finds_linked_entity() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR closes issue
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
assert_eq!(
result.expanded_entities[0].entity_ref.entity_type,
"merge_request"
);
assert_eq!(result.expanded_entities[0].entity_ref.entity_iid, 10);
assert_eq!(result.expanded_entities[0].depth, 1);
}
#[test]
fn test_expand_bidirectional() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR closes issue (MR is source, issue is target)
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
// Starting from MR should find the issue (outgoing)
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
assert_eq!(result.expanded_entities[0].entity_ref.entity_type, "issue");
}
#[test]
fn test_expand_respects_max_entities() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
// Create 10 MRs that all close this issue
for i in 2..=11 {
let mr_id = insert_mr(&conn, project_id, i);
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
}
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 3).unwrap();
assert!(result.expanded_entities.len() <= 3);
}
#[test]
fn test_expand_skips_mentions_by_default() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR mentions issue (should be skipped by default)
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"mentioned",
"note_parse",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
}
#[test]
fn test_expand_includes_mentions_when_flagged() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR mentions issue
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"mentioned",
"note_parse",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, true, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
}
#[test]
fn test_expand_collects_unresolved() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
// Unresolved cross-project reference
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, target_project_path, target_entity_iid, reference_type, source_method, created_at) VALUES (?1, 'issue', ?2, 'issue', NULL, 'other/repo', 42, 'closes', 'description_parse', 1000)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
assert_eq!(result.unresolved_references.len(), 1);
assert_eq!(
result.unresolved_references[0].target_project,
Some("other/repo".to_owned())
);
assert_eq!(result.unresolved_references[0].target_iid, 42);
}
#[test]
fn test_expand_tracks_provenance() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
let expanded = &result.expanded_entities[0];
assert_eq!(expanded.via_reference_type, "closes");
assert_eq!(expanded.via_source_method, "api");
assert_eq!(expanded.via_from.entity_type, "issue");
assert_eq!(expanded.via_from.entity_id, issue_id);
}
#[test]
fn test_expand_no_duplicates() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// Two references from MR to same issue (different methods)
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"related",
"note_parse",
);
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
// Should only appear once (first-come wins)
assert_eq!(result.expanded_entities.len(), 1);
}
#[test]
fn test_expand_empty_seeds() {
let conn = setup_test_db();
let result = expand_timeline(&conn, &[], 1, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
}
}