feat(core): add file rename chain resolver with depth-bounded BFS

New module: core::file_history with resolve_rename_chain() that traces
a file path through its rename history in mr_file_changes using
bidirectional BFS (forward: old_path->new_path, backward: new_path->old_path).

Key design decisions:
- Depth-bounded BFS: each queue entry carries its distance from the
  origin, so max_hops correctly limits by graph distance (not by total
  nodes discovered). This matters for branching rename graphs where a
  file was renamed differently in parallel MRs.
- Cycle-safe: visited set prevents infinite loops from circular renames.
- Project-scoped: queries are always scoped to a single project_id.
- Deterministic: output is sorted for stable results.

Tests cover: linear chains (forward/backward), cycles, max_hops=0,
depth-bounded linear chains, branching renames, diamond patterns,
and cross-project isolation (9 tests total).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-13 10:54:41 -05:00
parent 9786ef27f5
commit 48fbd4bfdb
3 changed files with 346 additions and 0 deletions

71
src/core/file_history.rs Normal file
View File

@@ -0,0 +1,71 @@
use std::collections::HashSet;
use std::collections::VecDeque;
use rusqlite::Connection;
use super::error::Result;
/// Resolves a file path through its rename history in `mr_file_changes`.
///
/// BFS in both directions: forward (`old_path` -> `new_path`) and backward
/// (`new_path` -> `old_path`). Returns all equivalent paths including the
/// original, sorted for determinism. Cycles are detected via a visited set.
///
/// `max_hops` limits the BFS depth (distance from the starting path).
pub fn resolve_rename_chain(
conn: &Connection,
project_id: i64,
path: &str,
max_hops: usize,
) -> Result<Vec<String>> {
let mut visited: HashSet<String> = HashSet::new();
visited.insert(path.to_string());
if max_hops == 0 {
return Ok(vec![path.to_string()]);
}
let mut queue: VecDeque<(String, usize)> = VecDeque::new();
queue.push_back((path.to_string(), 0));
let forward_sql = "\
SELECT DISTINCT mfc.new_path FROM mr_file_changes mfc \
WHERE mfc.project_id = ?1 AND mfc.old_path = ?2 AND mfc.change_type = 'renamed'";
let backward_sql = "\
SELECT DISTINCT mfc.old_path FROM mr_file_changes mfc \
WHERE mfc.project_id = ?1 AND mfc.new_path = ?2 AND mfc.change_type = 'renamed'";
while let Some((current, depth)) = queue.pop_front() {
if depth >= max_hops {
continue;
}
// Forward: current was the old name -> discover new names
let mut fwd_stmt = conn.prepare_cached(forward_sql)?;
let forward: Vec<String> = fwd_stmt
.query_map(rusqlite::params![project_id, &current], |row| row.get(0))?
.filter_map(std::result::Result::ok)
.collect();
// Backward: current was the new name -> discover old names
let mut bwd_stmt = conn.prepare_cached(backward_sql)?;
let backward: Vec<String> = bwd_stmt
.query_map(rusqlite::params![project_id, &current], |row| row.get(0))?
.filter_map(std::result::Result::ok)
.collect();
for discovered in forward.into_iter().chain(backward) {
if visited.insert(discovered.clone()) {
queue.push_back((discovered, depth + 1));
}
}
}
let mut paths: Vec<String> = visited.into_iter().collect();
paths.sort();
Ok(paths)
}
#[cfg(test)]
#[path = "file_history_tests.rs"]
mod tests;

View File

@@ -0,0 +1,274 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn seed_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'group/repo', 'https://gitlab.example.com/group/repo', 1000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (1, 300, 5, 1, 'Rename MR', 'merged', 1000, 2000, 2000, 'feature', 'main')",
[],
)
.unwrap();
1 // project_id
}
fn insert_rename(conn: &Connection, mr_id: i64, old_path: &str, new_path: &str) {
conn.execute(
"INSERT INTO mr_file_changes (merge_request_id, project_id, old_path, new_path, change_type)
VALUES (?1, 1, ?2, ?3, 'renamed')",
rusqlite::params![mr_id, old_path, new_path],
)
.unwrap();
}
#[test]
fn test_no_renames_returns_original_path() {
let conn = setup_test_db();
let project_id = seed_project(&conn);
let result = resolve_rename_chain(&conn, project_id, "src/auth.rs", 10).unwrap();
assert_eq!(result, ["src/auth.rs"]);
}
#[test]
fn test_forward_chain() {
// a.rs -> b.rs -> c.rs, starting from a.rs finds all three
let conn = setup_test_db();
let project_id = seed_project(&conn);
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
// Need a second MR for the next rename
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (2, 301, 6, 1, 'Rename MR 2', 'merged', 3000, 4000, 4000, 'feature2', 'main')",
[],
)
.unwrap();
insert_rename(&conn, 2, "src/b.rs", "src/c.rs");
let mut result = resolve_rename_chain(&conn, project_id, "src/a.rs", 10).unwrap();
result.sort();
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]);
}
#[test]
fn test_backward_chain() {
// a.rs -> b.rs -> c.rs, starting from c.rs finds all three
let conn = setup_test_db();
let project_id = seed_project(&conn);
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (2, 301, 6, 1, 'Rename MR 2', 'merged', 3000, 4000, 4000, 'feature2', 'main')",
[],
)
.unwrap();
insert_rename(&conn, 2, "src/b.rs", "src/c.rs");
let mut result = resolve_rename_chain(&conn, project_id, "src/c.rs", 10).unwrap();
result.sort();
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]);
}
#[test]
fn test_cycle_detection() {
// a -> b -> a: terminates without infinite loop
let conn = setup_test_db();
let project_id = seed_project(&conn);
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (2, 301, 6, 1, 'Rename back', 'merged', 3000, 4000, 4000, 'feature2', 'main')",
[],
)
.unwrap();
insert_rename(&conn, 2, "src/b.rs", "src/a.rs");
let mut result = resolve_rename_chain(&conn, project_id, "src/a.rs", 10).unwrap();
result.sort();
assert_eq!(result, ["src/a.rs", "src/b.rs"]);
}
#[test]
fn test_max_hops_zero_returns_original() {
let conn = setup_test_db();
let project_id = seed_project(&conn);
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
let result = resolve_rename_chain(&conn, project_id, "src/a.rs", 0).unwrap();
assert_eq!(result, ["src/a.rs"]);
}
#[test]
fn test_max_hops_bounded() {
// Chain: a -> b -> c -> d -> e (4 hops)
// With max_hops=2, should find exactly {a, b, c} (original + 2 depth levels)
let conn = setup_test_db();
let project_id = seed_project(&conn);
let paths = ["src/a.rs", "src/b.rs", "src/c.rs", "src/d.rs", "src/e.rs"];
for (i, window) in paths.windows(2).enumerate() {
if i > 0 {
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (?1, ?2, ?3, 1, 'MR', 'merged', ?4, ?5, ?5, 'feat', 'main')",
rusqlite::params![
(i + 1) as i64,
(300 + i) as i64,
(5 + i) as i64,
(1000 * (i + 1)) as i64,
(2000 * (i + 1)) as i64,
],
)
.unwrap();
}
#[allow(clippy::cast_possible_wrap)]
insert_rename(&conn, (i + 1) as i64, window[0], window[1]);
}
let result = resolve_rename_chain(&conn, project_id, "src/a.rs", 2).unwrap();
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]);
// Depth 1 should find only {a, b}
let result1 = resolve_rename_chain(&conn, project_id, "src/a.rs", 1).unwrap();
assert_eq!(result1, ["src/a.rs", "src/b.rs"]);
}
#[test]
fn test_diamond_pattern() {
// Diamond: a -> b, a -> c, b -> d, c -> d
// From a with max_hops=2, should find all four: {a, b, c, d}
let conn = setup_test_db();
let project_id = seed_project(&conn);
// MR 1: a -> b
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
// MR 2: a -> c
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (2, 301, 6, 1, 'MR 2', 'merged', 2000, 3000, 3000, 'feat2', 'main')",
[],
)
.unwrap();
insert_rename(&conn, 2, "src/a.rs", "src/c.rs");
// MR 3: b -> d
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (3, 302, 7, 1, 'MR 3', 'merged', 3000, 4000, 4000, 'feat3', 'main')",
[],
)
.unwrap();
insert_rename(&conn, 3, "src/b.rs", "src/d.rs");
// MR 4: c -> d
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (4, 303, 8, 1, 'MR 4', 'merged', 4000, 5000, 5000, 'feat4', 'main')",
[],
)
.unwrap();
insert_rename(&conn, 4, "src/c.rs", "src/d.rs");
// max_hops=2: a(0) -> {b,c}(1) -> {d}(2) — all four found
let result = resolve_rename_chain(&conn, project_id, "src/a.rs", 2).unwrap();
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs", "src/d.rs"]);
// max_hops=1: a(0) -> {b,c}(1) — d at depth 2 excluded
let result1 = resolve_rename_chain(&conn, project_id, "src/a.rs", 1).unwrap();
assert_eq!(result1, ["src/a.rs", "src/b.rs", "src/c.rs"]);
}
#[test]
fn test_branching_renames() {
// a.rs was renamed to b.rs in one MR and c.rs in another
let conn = setup_test_db();
let project_id = seed_project(&conn);
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (2, 301, 6, 1, 'Rename MR 2', 'merged', 3000, 4000, 4000, 'feature2', 'main')",
[],
)
.unwrap();
insert_rename(&conn, 2, "src/a.rs", "src/c.rs");
let mut result = resolve_rename_chain(&conn, project_id, "src/a.rs", 10).unwrap();
result.sort();
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]);
}
#[test]
fn test_different_project_isolation() {
// Renames in project 2 should not leak into project 1 queries
let conn = setup_test_db();
let _project_id = seed_project(&conn);
// Create project 2
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (2, 200, 'other/repo', 'https://gitlab.example.com/other/repo', 1000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (2, 301, 5, 2, 'Other MR', 'merged', 1000, 2000, 2000, 'feat', 'main')",
[],
)
.unwrap();
// Rename in project 1
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
// Rename in project 2 (different mr_id and project_id)
conn.execute(
"INSERT INTO mr_file_changes (merge_request_id, project_id, old_path, new_path, change_type)
VALUES (2, 2, 'src/a.rs', 'src/z.rs', 'renamed')",
[],
)
.unwrap();
// Query project 1 -- should NOT see z.rs
let mut result = resolve_rename_chain(&conn, 1, "src/a.rs", 10).unwrap();
result.sort();
assert_eq!(result, ["src/a.rs", "src/b.rs"]);
// Query project 2 -- should NOT see b.rs
let mut result2 = resolve_rename_chain(&conn, 2, "src/a.rs", 10).unwrap();
result2.sort();
assert_eq!(result2, ["src/a.rs", "src/z.rs"]);
}

View File

@@ -4,6 +4,7 @@ pub mod db;
pub mod dependent_queue; pub mod dependent_queue;
pub mod error; pub mod error;
pub mod events_db; pub mod events_db;
pub mod file_history;
pub mod lock; pub mod lock;
pub mod logging; pub mod logging;
pub mod metrics; pub mod metrics;