use std::collections::{HashSet, VecDeque}; use rusqlite::Connection; use crate::core::error::Result; use crate::core::timeline::{EntityRef, ExpandedEntityRef, UnresolvedRef}; /// Result of the expand phase. pub struct ExpandResult { pub expanded_entities: Vec, pub unresolved_references: Vec, } /// Run the EXPAND phase of the timeline pipeline (BFS over entity_references). /// /// Starting from seed entities, traverses cross-references (both outgoing and incoming) /// to discover related entities. Collects provenance (who referenced whom, how). pub fn expand_timeline( conn: &Connection, seeds: &[EntityRef], depth: u32, include_mentions: bool, max_entities: usize, ) -> Result { if depth == 0 || seeds.is_empty() { return Ok(ExpandResult { expanded_entities: Vec::new(), unresolved_references: Vec::new(), }); } let edge_types = if include_mentions { vec!["closes", "related", "mentioned"] } else { vec!["closes", "related"] }; let mut visited: HashSet<(String, i64)> = seeds .iter() .map(|s| (s.entity_type.clone(), s.entity_id)) .collect(); let mut queue: VecDeque<(EntityRef, u32)> = seeds.iter().map(|s| (s.clone(), 0)).collect(); let mut expanded = Vec::new(); let mut unresolved = Vec::new(); while let Some((current, current_depth)) = queue.pop_front() { if expanded.len() >= max_entities { break; } let neighbors = find_neighbors(conn, ¤t, &edge_types)?; for neighbor in neighbors { match neighbor { Neighbor::Resolved { entity_ref, reference_type, source_method, } => { let key = (entity_ref.entity_type.clone(), entity_ref.entity_id); if !visited.insert(key) { continue; } expanded.push(ExpandedEntityRef { entity_ref: entity_ref.clone(), depth: current_depth + 1, via_from: current.clone(), via_reference_type: reference_type, via_source_method: source_method, }); if expanded.len() >= max_entities { break; } if current_depth + 1 < depth { queue.push_back((entity_ref, current_depth + 1)); } } Neighbor::Unresolved(unresolved_ref) => { unresolved.push(unresolved_ref); } } } } Ok(ExpandResult { expanded_entities: expanded, unresolved_references: unresolved, }) } enum Neighbor { Resolved { entity_ref: EntityRef, reference_type: String, source_method: String, }, Unresolved(UnresolvedRef), } /// Find all neighbors (outgoing + incoming) for an entity in entity_references. fn find_neighbors( conn: &Connection, entity: &EntityRef, edge_types: &[&str], ) -> Result> { let mut neighbors = Vec::new(); find_outgoing(conn, entity, edge_types, &mut neighbors)?; find_incoming(conn, entity, edge_types, &mut neighbors)?; Ok(neighbors) } /// Find outgoing references: current entity is the source. fn find_outgoing( conn: &Connection, entity: &EntityRef, edge_types: &[&str], neighbors: &mut Vec, ) -> Result<()> { let placeholders: String = edge_types .iter() .enumerate() .map(|(i, _)| format!("?{}", i + 3)) .collect::>() .join(", "); let sql = format!( "SELECT target_entity_type, target_entity_id, target_project_path, target_entity_iid, reference_type, source_method FROM entity_references WHERE source_entity_type = ?1 AND source_entity_id = ?2 AND reference_type IN ({placeholders})" ); let mut params: Vec> = vec![ Box::new(entity.entity_type.clone()), Box::new(entity.entity_id), ]; for et in edge_types { params.push(Box::new(et.to_string())); } let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; let rows = stmt.query_map(params_refs.as_slice(), |row| { Ok(( row.get::<_, String>(0)?, // target_entity_type row.get::<_, Option>(1)?, // target_entity_id row.get::<_, Option>(2)?, // target_project_path row.get::<_, Option>(3)?, // target_entity_iid row.get::<_, String>(4)?, // reference_type row.get::<_, String>(5)?, // source_method )) })?; for row_result in rows { let (target_type, target_id, target_project_path, target_iid, ref_type, source_method) = row_result?; match target_id { Some(tid) => { if let Some(resolved) = resolve_entity_ref(conn, &target_type, tid)? { neighbors.push(Neighbor::Resolved { entity_ref: resolved, reference_type: ref_type, source_method, }); } } None => { neighbors.push(Neighbor::Unresolved(UnresolvedRef { source: entity.clone(), target_project: target_project_path, target_type, target_iid: target_iid.unwrap_or(0), reference_type: ref_type, })); } } } Ok(()) } /// Find incoming references: current entity is the target. fn find_incoming( conn: &Connection, entity: &EntityRef, edge_types: &[&str], neighbors: &mut Vec, ) -> Result<()> { let placeholders: String = edge_types .iter() .enumerate() .map(|(i, _)| format!("?{}", i + 3)) .collect::>() .join(", "); let sql = format!( "SELECT source_entity_type, source_entity_id, reference_type, source_method FROM entity_references WHERE target_entity_type = ?1 AND target_entity_id = ?2 AND reference_type IN ({placeholders})" ); let mut params: Vec> = vec![ Box::new(entity.entity_type.clone()), Box::new(entity.entity_id), ]; for et in edge_types { params.push(Box::new(et.to_string())); } let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let mut stmt = conn.prepare(&sql)?; let rows = stmt.query_map(params_refs.as_slice(), |row| { Ok(( row.get::<_, String>(0)?, // source_entity_type row.get::<_, i64>(1)?, // source_entity_id row.get::<_, String>(2)?, // reference_type row.get::<_, String>(3)?, // source_method )) })?; for row_result in rows { let (source_type, source_id, ref_type, source_method) = row_result?; if let Some(resolved) = resolve_entity_ref(conn, &source_type, source_id)? { neighbors.push(Neighbor::Resolved { entity_ref: resolved, reference_type: ref_type, source_method, }); } } Ok(()) } /// Resolve an entity ID to a full EntityRef with iid and project_path. fn resolve_entity_ref( conn: &Connection, entity_type: &str, entity_id: i64, ) -> Result> { let table = match entity_type { "issue" => "issues", "merge_request" => "merge_requests", _ => return Ok(None), }; let sql = format!( "SELECT e.iid, p.path_with_namespace FROM {table} e JOIN projects p ON p.id = e.project_id WHERE e.id = ?1" ); let result = conn.query_row(&sql, rusqlite::params![entity_id], |row| { Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)) }); match result { Ok((iid, project_path)) => Ok(Some(EntityRef { entity_type: entity_type.to_owned(), entity_id, entity_iid: iid, project_path, })), Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), Err(e) => Err(e.into()), } } #[cfg(test)] mod tests { use super::*; use crate::core::db::{create_connection, run_migrations}; use std::path::Path; fn setup_test_db() -> Connection { let conn = create_connection(Path::new(":memory:")).unwrap(); run_migrations(&conn).unwrap(); conn } fn insert_project(conn: &Connection) -> i64 { conn.execute( "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')", [], ) .unwrap(); conn.last_insert_rowid() } fn insert_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 { conn.execute( "INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test', 'opened', 'alice', 1000, 2000, 3000)", rusqlite::params![iid * 100, project_id, iid], ) .unwrap(); conn.last_insert_rowid() } fn insert_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 { conn.execute( "INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)", rusqlite::params![iid * 100, project_id, iid], ) .unwrap(); conn.last_insert_rowid() } #[allow(clippy::too_many_arguments)] fn insert_ref( conn: &Connection, project_id: i64, source_type: &str, source_id: i64, target_type: &str, target_id: Option, ref_type: &str, source_method: &str, ) { conn.execute( "INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, reference_type, source_method, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1000)", rusqlite::params![project_id, source_type, source_id, target_type, target_id, ref_type, source_method], ) .unwrap(); } fn make_entity_ref(entity_type: &str, entity_id: i64, iid: i64) -> EntityRef { EntityRef { entity_type: entity_type.to_owned(), entity_id, entity_iid: iid, project_path: "group/project".to_owned(), } } #[test] fn test_expand_depth_zero() { let conn = setup_test_db(); let project_id = insert_project(&conn); let issue_id = insert_issue(&conn, project_id, 1); let seeds = vec![make_entity_ref("issue", issue_id, 1)]; let result = expand_timeline(&conn, &seeds, 0, false, 100).unwrap(); assert!(result.expanded_entities.is_empty()); assert!(result.unresolved_references.is_empty()); } #[test] fn test_expand_finds_linked_entity() { let conn = setup_test_db(); let project_id = insert_project(&conn); let issue_id = insert_issue(&conn, project_id, 1); let mr_id = insert_mr(&conn, project_id, 10); // MR closes issue insert_ref( &conn, project_id, "merge_request", mr_id, "issue", Some(issue_id), "closes", "api", ); let seeds = vec![make_entity_ref("issue", issue_id, 1)]; let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); assert_eq!(result.expanded_entities.len(), 1); assert_eq!( result.expanded_entities[0].entity_ref.entity_type, "merge_request" ); assert_eq!(result.expanded_entities[0].entity_ref.entity_iid, 10); assert_eq!(result.expanded_entities[0].depth, 1); } #[test] fn test_expand_bidirectional() { let conn = setup_test_db(); let project_id = insert_project(&conn); let issue_id = insert_issue(&conn, project_id, 1); let mr_id = insert_mr(&conn, project_id, 10); // MR closes issue (MR is source, issue is target) insert_ref( &conn, project_id, "merge_request", mr_id, "issue", Some(issue_id), "closes", "api", ); // Starting from MR should find the issue (outgoing) let seeds = vec![make_entity_ref("merge_request", mr_id, 10)]; let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); assert_eq!(result.expanded_entities.len(), 1); assert_eq!(result.expanded_entities[0].entity_ref.entity_type, "issue"); } #[test] fn test_expand_respects_max_entities() { let conn = setup_test_db(); let project_id = insert_project(&conn); let issue_id = insert_issue(&conn, project_id, 1); // Create 10 MRs that all close this issue for i in 2..=11 { let mr_id = insert_mr(&conn, project_id, i); insert_ref( &conn, project_id, "merge_request", mr_id, "issue", Some(issue_id), "closes", "api", ); } let seeds = vec![make_entity_ref("issue", issue_id, 1)]; let result = expand_timeline(&conn, &seeds, 1, false, 3).unwrap(); assert!(result.expanded_entities.len() <= 3); } #[test] fn test_expand_skips_mentions_by_default() { let conn = setup_test_db(); let project_id = insert_project(&conn); let issue_id = insert_issue(&conn, project_id, 1); let mr_id = insert_mr(&conn, project_id, 10); // MR mentions issue (should be skipped by default) insert_ref( &conn, project_id, "merge_request", mr_id, "issue", Some(issue_id), "mentioned", "note_parse", ); let seeds = vec![make_entity_ref("issue", issue_id, 1)]; let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); assert!(result.expanded_entities.is_empty()); } #[test] fn test_expand_includes_mentions_when_flagged() { let conn = setup_test_db(); let project_id = insert_project(&conn); let issue_id = insert_issue(&conn, project_id, 1); let mr_id = insert_mr(&conn, project_id, 10); // MR mentions issue insert_ref( &conn, project_id, "merge_request", mr_id, "issue", Some(issue_id), "mentioned", "note_parse", ); let seeds = vec![make_entity_ref("issue", issue_id, 1)]; let result = expand_timeline(&conn, &seeds, 1, true, 100).unwrap(); assert_eq!(result.expanded_entities.len(), 1); } #[test] fn test_expand_collects_unresolved() { let conn = setup_test_db(); let project_id = insert_project(&conn); let issue_id = insert_issue(&conn, project_id, 1); // Unresolved cross-project reference conn.execute( "INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, target_project_path, target_entity_iid, reference_type, source_method, created_at) VALUES (?1, 'issue', ?2, 'issue', NULL, 'other/repo', 42, 'closes', 'description_parse', 1000)", rusqlite::params![project_id, issue_id], ) .unwrap(); let seeds = vec![make_entity_ref("issue", issue_id, 1)]; let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); assert!(result.expanded_entities.is_empty()); assert_eq!(result.unresolved_references.len(), 1); assert_eq!( result.unresolved_references[0].target_project, Some("other/repo".to_owned()) ); assert_eq!(result.unresolved_references[0].target_iid, 42); } #[test] fn test_expand_tracks_provenance() { let conn = setup_test_db(); let project_id = insert_project(&conn); let issue_id = insert_issue(&conn, project_id, 1); let mr_id = insert_mr(&conn, project_id, 10); insert_ref( &conn, project_id, "merge_request", mr_id, "issue", Some(issue_id), "closes", "api", ); let seeds = vec![make_entity_ref("issue", issue_id, 1)]; let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); assert_eq!(result.expanded_entities.len(), 1); let expanded = &result.expanded_entities[0]; assert_eq!(expanded.via_reference_type, "closes"); assert_eq!(expanded.via_source_method, "api"); assert_eq!(expanded.via_from.entity_type, "issue"); assert_eq!(expanded.via_from.entity_id, issue_id); } #[test] fn test_expand_no_duplicates() { let conn = setup_test_db(); let project_id = insert_project(&conn); let issue_id = insert_issue(&conn, project_id, 1); let mr_id = insert_mr(&conn, project_id, 10); // Two references from MR to same issue (different methods) insert_ref( &conn, project_id, "merge_request", mr_id, "issue", Some(issue_id), "closes", "api", ); insert_ref( &conn, project_id, "merge_request", mr_id, "issue", Some(issue_id), "related", "note_parse", ); let seeds = vec![make_entity_ref("merge_request", mr_id, 10)]; let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap(); // Should only appear once (first-come wins) assert_eq!(result.expanded_entities.len(), 1); } #[test] fn test_expand_empty_seeds() { let conn = setup_test_db(); let result = expand_timeline(&conn, &[], 1, false, 100).unwrap(); assert!(result.expanded_entities.is_empty()); } }