From 48fbd4bfdb7878c3b4dbf5ba2b10f0ad1d613dad Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Fri, 13 Feb 2026 10:54:41 -0500 Subject: [PATCH] feat(core): add file rename chain resolver with depth-bounded BFS New module: core::file_history with resolve_rename_chain() that traces a file path through its rename history in mr_file_changes using bidirectional BFS (forward: old_path->new_path, backward: new_path->old_path). Key design decisions: - Depth-bounded BFS: each queue entry carries its distance from the origin, so max_hops correctly limits by graph distance (not by total nodes discovered). This matters for branching rename graphs where a file was renamed differently in parallel MRs. - Cycle-safe: visited set prevents infinite loops from circular renames. - Project-scoped: queries are always scoped to a single project_id. - Deterministic: output is sorted for stable results. Tests cover: linear chains (forward/backward), cycles, max_hops=0, depth-bounded linear chains, branching renames, diamond patterns, and cross-project isolation (9 tests total). Co-Authored-By: Claude Opus 4.6 --- src/core/file_history.rs | 71 +++++++++ src/core/file_history_tests.rs | 274 +++++++++++++++++++++++++++++++++ src/core/mod.rs | 1 + 3 files changed, 346 insertions(+) create mode 100644 src/core/file_history.rs create mode 100644 src/core/file_history_tests.rs diff --git a/src/core/file_history.rs b/src/core/file_history.rs new file mode 100644 index 0000000..fbc81d4 --- /dev/null +++ b/src/core/file_history.rs @@ -0,0 +1,71 @@ +use std::collections::HashSet; +use std::collections::VecDeque; + +use rusqlite::Connection; + +use super::error::Result; + +/// Resolves a file path through its rename history in `mr_file_changes`. +/// +/// BFS in both directions: forward (`old_path` -> `new_path`) and backward +/// (`new_path` -> `old_path`). Returns all equivalent paths including the +/// original, sorted for determinism. Cycles are detected via a visited set. +/// +/// `max_hops` limits the BFS depth (distance from the starting path). +pub fn resolve_rename_chain( + conn: &Connection, + project_id: i64, + path: &str, + max_hops: usize, +) -> Result> { + let mut visited: HashSet = HashSet::new(); + visited.insert(path.to_string()); + + if max_hops == 0 { + return Ok(vec![path.to_string()]); + } + + let mut queue: VecDeque<(String, usize)> = VecDeque::new(); + queue.push_back((path.to_string(), 0)); + + let forward_sql = "\ + SELECT DISTINCT mfc.new_path FROM mr_file_changes mfc \ + WHERE mfc.project_id = ?1 AND mfc.old_path = ?2 AND mfc.change_type = 'renamed'"; + let backward_sql = "\ + SELECT DISTINCT mfc.old_path FROM mr_file_changes mfc \ + WHERE mfc.project_id = ?1 AND mfc.new_path = ?2 AND mfc.change_type = 'renamed'"; + + while let Some((current, depth)) = queue.pop_front() { + if depth >= max_hops { + continue; + } + + // Forward: current was the old name -> discover new names + let mut fwd_stmt = conn.prepare_cached(forward_sql)?; + let forward: Vec = fwd_stmt + .query_map(rusqlite::params![project_id, ¤t], |row| row.get(0))? + .filter_map(std::result::Result::ok) + .collect(); + + // Backward: current was the new name -> discover old names + let mut bwd_stmt = conn.prepare_cached(backward_sql)?; + let backward: Vec = bwd_stmt + .query_map(rusqlite::params![project_id, ¤t], |row| row.get(0))? + .filter_map(std::result::Result::ok) + .collect(); + + for discovered in forward.into_iter().chain(backward) { + if visited.insert(discovered.clone()) { + queue.push_back((discovered, depth + 1)); + } + } + } + + let mut paths: Vec = visited.into_iter().collect(); + paths.sort(); + Ok(paths) +} + +#[cfg(test)] +#[path = "file_history_tests.rs"] +mod tests; diff --git a/src/core/file_history_tests.rs b/src/core/file_history_tests.rs new file mode 100644 index 0000000..8273922 --- /dev/null +++ b/src/core/file_history_tests.rs @@ -0,0 +1,274 @@ +use super::*; +use crate::core::db::{create_connection, run_migrations}; +use std::path::Path; + +fn setup_test_db() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + conn +} + +fn seed_project(conn: &Connection) -> i64 { + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (1, 100, 'group/repo', 'https://gitlab.example.com/group/repo', 1000, 2000)", + [], + ) + .unwrap(); + + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (1, 300, 5, 1, 'Rename MR', 'merged', 1000, 2000, 2000, 'feature', 'main')", + [], + ) + .unwrap(); + + 1 // project_id +} + +fn insert_rename(conn: &Connection, mr_id: i64, old_path: &str, new_path: &str) { + conn.execute( + "INSERT INTO mr_file_changes (merge_request_id, project_id, old_path, new_path, change_type) + VALUES (?1, 1, ?2, ?3, 'renamed')", + rusqlite::params![mr_id, old_path, new_path], + ) + .unwrap(); +} + +#[test] +fn test_no_renames_returns_original_path() { + let conn = setup_test_db(); + let project_id = seed_project(&conn); + + let result = resolve_rename_chain(&conn, project_id, "src/auth.rs", 10).unwrap(); + assert_eq!(result, ["src/auth.rs"]); +} + +#[test] +fn test_forward_chain() { + // a.rs -> b.rs -> c.rs, starting from a.rs finds all three + let conn = setup_test_db(); + let project_id = seed_project(&conn); + + insert_rename(&conn, 1, "src/a.rs", "src/b.rs"); + + // Need a second MR for the next rename + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (2, 301, 6, 1, 'Rename MR 2', 'merged', 3000, 4000, 4000, 'feature2', 'main')", + [], + ) + .unwrap(); + insert_rename(&conn, 2, "src/b.rs", "src/c.rs"); + + let mut result = resolve_rename_chain(&conn, project_id, "src/a.rs", 10).unwrap(); + result.sort(); + assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]); +} + +#[test] +fn test_backward_chain() { + // a.rs -> b.rs -> c.rs, starting from c.rs finds all three + let conn = setup_test_db(); + let project_id = seed_project(&conn); + + insert_rename(&conn, 1, "src/a.rs", "src/b.rs"); + + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (2, 301, 6, 1, 'Rename MR 2', 'merged', 3000, 4000, 4000, 'feature2', 'main')", + [], + ) + .unwrap(); + insert_rename(&conn, 2, "src/b.rs", "src/c.rs"); + + let mut result = resolve_rename_chain(&conn, project_id, "src/c.rs", 10).unwrap(); + result.sort(); + assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]); +} + +#[test] +fn test_cycle_detection() { + // a -> b -> a: terminates without infinite loop + let conn = setup_test_db(); + let project_id = seed_project(&conn); + + insert_rename(&conn, 1, "src/a.rs", "src/b.rs"); + + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (2, 301, 6, 1, 'Rename back', 'merged', 3000, 4000, 4000, 'feature2', 'main')", + [], + ) + .unwrap(); + insert_rename(&conn, 2, "src/b.rs", "src/a.rs"); + + let mut result = resolve_rename_chain(&conn, project_id, "src/a.rs", 10).unwrap(); + result.sort(); + assert_eq!(result, ["src/a.rs", "src/b.rs"]); +} + +#[test] +fn test_max_hops_zero_returns_original() { + let conn = setup_test_db(); + let project_id = seed_project(&conn); + + insert_rename(&conn, 1, "src/a.rs", "src/b.rs"); + + let result = resolve_rename_chain(&conn, project_id, "src/a.rs", 0).unwrap(); + assert_eq!(result, ["src/a.rs"]); +} + +#[test] +fn test_max_hops_bounded() { + // Chain: a -> b -> c -> d -> e (4 hops) + // With max_hops=2, should find exactly {a, b, c} (original + 2 depth levels) + let conn = setup_test_db(); + let project_id = seed_project(&conn); + + let paths = ["src/a.rs", "src/b.rs", "src/c.rs", "src/d.rs", "src/e.rs"]; + for (i, window) in paths.windows(2).enumerate() { + if i > 0 { + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (?1, ?2, ?3, 1, 'MR', 'merged', ?4, ?5, ?5, 'feat', 'main')", + rusqlite::params![ + (i + 1) as i64, + (300 + i) as i64, + (5 + i) as i64, + (1000 * (i + 1)) as i64, + (2000 * (i + 1)) as i64, + ], + ) + .unwrap(); + } + #[allow(clippy::cast_possible_wrap)] + insert_rename(&conn, (i + 1) as i64, window[0], window[1]); + } + + let result = resolve_rename_chain(&conn, project_id, "src/a.rs", 2).unwrap(); + assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]); + + // Depth 1 should find only {a, b} + let result1 = resolve_rename_chain(&conn, project_id, "src/a.rs", 1).unwrap(); + assert_eq!(result1, ["src/a.rs", "src/b.rs"]); +} + +#[test] +fn test_diamond_pattern() { + // Diamond: a -> b, a -> c, b -> d, c -> d + // From a with max_hops=2, should find all four: {a, b, c, d} + let conn = setup_test_db(); + let project_id = seed_project(&conn); + + // MR 1: a -> b + insert_rename(&conn, 1, "src/a.rs", "src/b.rs"); + + // MR 2: a -> c + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (2, 301, 6, 1, 'MR 2', 'merged', 2000, 3000, 3000, 'feat2', 'main')", + [], + ) + .unwrap(); + insert_rename(&conn, 2, "src/a.rs", "src/c.rs"); + + // MR 3: b -> d + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (3, 302, 7, 1, 'MR 3', 'merged', 3000, 4000, 4000, 'feat3', 'main')", + [], + ) + .unwrap(); + insert_rename(&conn, 3, "src/b.rs", "src/d.rs"); + + // MR 4: c -> d + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (4, 303, 8, 1, 'MR 4', 'merged', 4000, 5000, 5000, 'feat4', 'main')", + [], + ) + .unwrap(); + insert_rename(&conn, 4, "src/c.rs", "src/d.rs"); + + // max_hops=2: a(0) -> {b,c}(1) -> {d}(2) — all four found + let result = resolve_rename_chain(&conn, project_id, "src/a.rs", 2).unwrap(); + assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs", "src/d.rs"]); + + // max_hops=1: a(0) -> {b,c}(1) — d at depth 2 excluded + let result1 = resolve_rename_chain(&conn, project_id, "src/a.rs", 1).unwrap(); + assert_eq!(result1, ["src/a.rs", "src/b.rs", "src/c.rs"]); +} + +#[test] +fn test_branching_renames() { + // a.rs was renamed to b.rs in one MR and c.rs in another + let conn = setup_test_db(); + let project_id = seed_project(&conn); + + insert_rename(&conn, 1, "src/a.rs", "src/b.rs"); + + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (2, 301, 6, 1, 'Rename MR 2', 'merged', 3000, 4000, 4000, 'feature2', 'main')", + [], + ) + .unwrap(); + insert_rename(&conn, 2, "src/a.rs", "src/c.rs"); + + let mut result = resolve_rename_chain(&conn, project_id, "src/a.rs", 10).unwrap(); + result.sort(); + assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]); +} + +#[test] +fn test_different_project_isolation() { + // Renames in project 2 should not leak into project 1 queries + let conn = setup_test_db(); + let _project_id = seed_project(&conn); + + // Create project 2 + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (2, 200, 'other/repo', 'https://gitlab.example.com/other/repo', 1000, 2000)", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (2, 301, 5, 2, 'Other MR', 'merged', 1000, 2000, 2000, 'feat', 'main')", + [], + ) + .unwrap(); + + // Rename in project 1 + insert_rename(&conn, 1, "src/a.rs", "src/b.rs"); + + // Rename in project 2 (different mr_id and project_id) + conn.execute( + "INSERT INTO mr_file_changes (merge_request_id, project_id, old_path, new_path, change_type) + VALUES (2, 2, 'src/a.rs', 'src/z.rs', 'renamed')", + [], + ) + .unwrap(); + + // Query project 1 -- should NOT see z.rs + let mut result = resolve_rename_chain(&conn, 1, "src/a.rs", 10).unwrap(); + result.sort(); + assert_eq!(result, ["src/a.rs", "src/b.rs"]); + + // Query project 2 -- should NOT see b.rs + let mut result2 = resolve_rename_chain(&conn, 2, "src/a.rs", 10).unwrap(); + result2.sort(); + assert_eq!(result2, ["src/a.rs", "src/z.rs"]); +} diff --git a/src/core/mod.rs b/src/core/mod.rs index 0bf1534..30a288a 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -4,6 +4,7 @@ pub mod db; pub mod dependent_queue; pub mod error; pub mod events_db; +pub mod file_history; pub mod lock; pub mod logging; pub mod metrics;