feat(core): add file rename chain resolver with depth-bounded BFS
New module: core::file_history with resolve_rename_chain() that traces a file path through its rename history in mr_file_changes using bidirectional BFS (forward: old_path->new_path, backward: new_path->old_path). Key design decisions: - Depth-bounded BFS: each queue entry carries its distance from the origin, so max_hops correctly limits by graph distance (not by total nodes discovered). This matters for branching rename graphs where a file was renamed differently in parallel MRs. - Cycle-safe: visited set prevents infinite loops from circular renames. - Project-scoped: queries are always scoped to a single project_id. - Deterministic: output is sorted for stable results. Tests cover: linear chains (forward/backward), cycles, max_hops=0, depth-bounded linear chains, branching renames, diamond patterns, and cross-project isolation (9 tests total). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
71
src/core/file_history.rs
Normal file
71
src/core/file_history.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use rusqlite::Connection;
|
||||
|
||||
use super::error::Result;
|
||||
|
||||
/// Resolves a file path through its rename history in `mr_file_changes`.
|
||||
///
|
||||
/// BFS in both directions: forward (`old_path` -> `new_path`) and backward
|
||||
/// (`new_path` -> `old_path`). Returns all equivalent paths including the
|
||||
/// original, sorted for determinism. Cycles are detected via a visited set.
|
||||
///
|
||||
/// `max_hops` limits the BFS depth (distance from the starting path).
|
||||
pub fn resolve_rename_chain(
|
||||
conn: &Connection,
|
||||
project_id: i64,
|
||||
path: &str,
|
||||
max_hops: usize,
|
||||
) -> Result<Vec<String>> {
|
||||
let mut visited: HashSet<String> = HashSet::new();
|
||||
visited.insert(path.to_string());
|
||||
|
||||
if max_hops == 0 {
|
||||
return Ok(vec![path.to_string()]);
|
||||
}
|
||||
|
||||
let mut queue: VecDeque<(String, usize)> = VecDeque::new();
|
||||
queue.push_back((path.to_string(), 0));
|
||||
|
||||
let forward_sql = "\
|
||||
SELECT DISTINCT mfc.new_path FROM mr_file_changes mfc \
|
||||
WHERE mfc.project_id = ?1 AND mfc.old_path = ?2 AND mfc.change_type = 'renamed'";
|
||||
let backward_sql = "\
|
||||
SELECT DISTINCT mfc.old_path FROM mr_file_changes mfc \
|
||||
WHERE mfc.project_id = ?1 AND mfc.new_path = ?2 AND mfc.change_type = 'renamed'";
|
||||
|
||||
while let Some((current, depth)) = queue.pop_front() {
|
||||
if depth >= max_hops {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Forward: current was the old name -> discover new names
|
||||
let mut fwd_stmt = conn.prepare_cached(forward_sql)?;
|
||||
let forward: Vec<String> = fwd_stmt
|
||||
.query_map(rusqlite::params![project_id, ¤t], |row| row.get(0))?
|
||||
.filter_map(std::result::Result::ok)
|
||||
.collect();
|
||||
|
||||
// Backward: current was the new name -> discover old names
|
||||
let mut bwd_stmt = conn.prepare_cached(backward_sql)?;
|
||||
let backward: Vec<String> = bwd_stmt
|
||||
.query_map(rusqlite::params![project_id, ¤t], |row| row.get(0))?
|
||||
.filter_map(std::result::Result::ok)
|
||||
.collect();
|
||||
|
||||
for discovered in forward.into_iter().chain(backward) {
|
||||
if visited.insert(discovered.clone()) {
|
||||
queue.push_back((discovered, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut paths: Vec<String> = visited.into_iter().collect();
|
||||
paths.sort();
|
||||
Ok(paths)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "file_history_tests.rs"]
|
||||
mod tests;
|
||||
274
src/core/file_history_tests.rs
Normal file
274
src/core/file_history_tests.rs
Normal file
@@ -0,0 +1,274 @@
|
||||
use super::*;
|
||||
use crate::core::db::{create_connection, run_migrations};
|
||||
use std::path::Path;
|
||||
|
||||
fn setup_test_db() -> Connection {
|
||||
let conn = create_connection(Path::new(":memory:")).unwrap();
|
||||
run_migrations(&conn).unwrap();
|
||||
conn
|
||||
}
|
||||
|
||||
fn seed_project(conn: &Connection) -> i64 {
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (1, 100, 'group/repo', 'https://gitlab.example.com/group/repo', 1000, 2000)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (1, 300, 5, 1, 'Rename MR', 'merged', 1000, 2000, 2000, 'feature', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
1 // project_id
|
||||
}
|
||||
|
||||
fn insert_rename(conn: &Connection, mr_id: i64, old_path: &str, new_path: &str) {
|
||||
conn.execute(
|
||||
"INSERT INTO mr_file_changes (merge_request_id, project_id, old_path, new_path, change_type)
|
||||
VALUES (?1, 1, ?2, ?3, 'renamed')",
|
||||
rusqlite::params![mr_id, old_path, new_path],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_renames_returns_original_path() {
|
||||
let conn = setup_test_db();
|
||||
let project_id = seed_project(&conn);
|
||||
|
||||
let result = resolve_rename_chain(&conn, project_id, "src/auth.rs", 10).unwrap();
|
||||
assert_eq!(result, ["src/auth.rs"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_forward_chain() {
|
||||
// a.rs -> b.rs -> c.rs, starting from a.rs finds all three
|
||||
let conn = setup_test_db();
|
||||
let project_id = seed_project(&conn);
|
||||
|
||||
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
|
||||
|
||||
// Need a second MR for the next rename
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (2, 301, 6, 1, 'Rename MR 2', 'merged', 3000, 4000, 4000, 'feature2', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
insert_rename(&conn, 2, "src/b.rs", "src/c.rs");
|
||||
|
||||
let mut result = resolve_rename_chain(&conn, project_id, "src/a.rs", 10).unwrap();
|
||||
result.sort();
|
||||
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backward_chain() {
|
||||
// a.rs -> b.rs -> c.rs, starting from c.rs finds all three
|
||||
let conn = setup_test_db();
|
||||
let project_id = seed_project(&conn);
|
||||
|
||||
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (2, 301, 6, 1, 'Rename MR 2', 'merged', 3000, 4000, 4000, 'feature2', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
insert_rename(&conn, 2, "src/b.rs", "src/c.rs");
|
||||
|
||||
let mut result = resolve_rename_chain(&conn, project_id, "src/c.rs", 10).unwrap();
|
||||
result.sort();
|
||||
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cycle_detection() {
|
||||
// a -> b -> a: terminates without infinite loop
|
||||
let conn = setup_test_db();
|
||||
let project_id = seed_project(&conn);
|
||||
|
||||
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (2, 301, 6, 1, 'Rename back', 'merged', 3000, 4000, 4000, 'feature2', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
insert_rename(&conn, 2, "src/b.rs", "src/a.rs");
|
||||
|
||||
let mut result = resolve_rename_chain(&conn, project_id, "src/a.rs", 10).unwrap();
|
||||
result.sort();
|
||||
assert_eq!(result, ["src/a.rs", "src/b.rs"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max_hops_zero_returns_original() {
|
||||
let conn = setup_test_db();
|
||||
let project_id = seed_project(&conn);
|
||||
|
||||
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
|
||||
|
||||
let result = resolve_rename_chain(&conn, project_id, "src/a.rs", 0).unwrap();
|
||||
assert_eq!(result, ["src/a.rs"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max_hops_bounded() {
|
||||
// Chain: a -> b -> c -> d -> e (4 hops)
|
||||
// With max_hops=2, should find exactly {a, b, c} (original + 2 depth levels)
|
||||
let conn = setup_test_db();
|
||||
let project_id = seed_project(&conn);
|
||||
|
||||
let paths = ["src/a.rs", "src/b.rs", "src/c.rs", "src/d.rs", "src/e.rs"];
|
||||
for (i, window) in paths.windows(2).enumerate() {
|
||||
if i > 0 {
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (?1, ?2, ?3, 1, 'MR', 'merged', ?4, ?5, ?5, 'feat', 'main')",
|
||||
rusqlite::params![
|
||||
(i + 1) as i64,
|
||||
(300 + i) as i64,
|
||||
(5 + i) as i64,
|
||||
(1000 * (i + 1)) as i64,
|
||||
(2000 * (i + 1)) as i64,
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
#[allow(clippy::cast_possible_wrap)]
|
||||
insert_rename(&conn, (i + 1) as i64, window[0], window[1]);
|
||||
}
|
||||
|
||||
let result = resolve_rename_chain(&conn, project_id, "src/a.rs", 2).unwrap();
|
||||
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]);
|
||||
|
||||
// Depth 1 should find only {a, b}
|
||||
let result1 = resolve_rename_chain(&conn, project_id, "src/a.rs", 1).unwrap();
|
||||
assert_eq!(result1, ["src/a.rs", "src/b.rs"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_diamond_pattern() {
|
||||
// Diamond: a -> b, a -> c, b -> d, c -> d
|
||||
// From a with max_hops=2, should find all four: {a, b, c, d}
|
||||
let conn = setup_test_db();
|
||||
let project_id = seed_project(&conn);
|
||||
|
||||
// MR 1: a -> b
|
||||
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
|
||||
|
||||
// MR 2: a -> c
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (2, 301, 6, 1, 'MR 2', 'merged', 2000, 3000, 3000, 'feat2', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
insert_rename(&conn, 2, "src/a.rs", "src/c.rs");
|
||||
|
||||
// MR 3: b -> d
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (3, 302, 7, 1, 'MR 3', 'merged', 3000, 4000, 4000, 'feat3', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
insert_rename(&conn, 3, "src/b.rs", "src/d.rs");
|
||||
|
||||
// MR 4: c -> d
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (4, 303, 8, 1, 'MR 4', 'merged', 4000, 5000, 5000, 'feat4', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
insert_rename(&conn, 4, "src/c.rs", "src/d.rs");
|
||||
|
||||
// max_hops=2: a(0) -> {b,c}(1) -> {d}(2) — all four found
|
||||
let result = resolve_rename_chain(&conn, project_id, "src/a.rs", 2).unwrap();
|
||||
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs", "src/d.rs"]);
|
||||
|
||||
// max_hops=1: a(0) -> {b,c}(1) — d at depth 2 excluded
|
||||
let result1 = resolve_rename_chain(&conn, project_id, "src/a.rs", 1).unwrap();
|
||||
assert_eq!(result1, ["src/a.rs", "src/b.rs", "src/c.rs"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_branching_renames() {
|
||||
// a.rs was renamed to b.rs in one MR and c.rs in another
|
||||
let conn = setup_test_db();
|
||||
let project_id = seed_project(&conn);
|
||||
|
||||
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (2, 301, 6, 1, 'Rename MR 2', 'merged', 3000, 4000, 4000, 'feature2', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
insert_rename(&conn, 2, "src/a.rs", "src/c.rs");
|
||||
|
||||
let mut result = resolve_rename_chain(&conn, project_id, "src/a.rs", 10).unwrap();
|
||||
result.sort();
|
||||
assert_eq!(result, ["src/a.rs", "src/b.rs", "src/c.rs"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_different_project_isolation() {
|
||||
// Renames in project 2 should not leak into project 1 queries
|
||||
let conn = setup_test_db();
|
||||
let _project_id = seed_project(&conn);
|
||||
|
||||
// Create project 2
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (2, 200, 'other/repo', 'https://gitlab.example.com/other/repo', 1000, 2000)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
|
||||
created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (2, 301, 5, 2, 'Other MR', 'merged', 1000, 2000, 2000, 'feat', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Rename in project 1
|
||||
insert_rename(&conn, 1, "src/a.rs", "src/b.rs");
|
||||
|
||||
// Rename in project 2 (different mr_id and project_id)
|
||||
conn.execute(
|
||||
"INSERT INTO mr_file_changes (merge_request_id, project_id, old_path, new_path, change_type)
|
||||
VALUES (2, 2, 'src/a.rs', 'src/z.rs', 'renamed')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Query project 1 -- should NOT see z.rs
|
||||
let mut result = resolve_rename_chain(&conn, 1, "src/a.rs", 10).unwrap();
|
||||
result.sort();
|
||||
assert_eq!(result, ["src/a.rs", "src/b.rs"]);
|
||||
|
||||
// Query project 2 -- should NOT see b.rs
|
||||
let mut result2 = resolve_rename_chain(&conn, 2, "src/a.rs", 10).unwrap();
|
||||
result2.sort();
|
||||
assert_eq!(result2, ["src/a.rs", "src/z.rs"]);
|
||||
}
|
||||
@@ -4,6 +4,7 @@ pub mod db;
|
||||
pub mod dependent_queue;
|
||||
pub mod error;
|
||||
pub mod events_db;
|
||||
pub mod file_history;
|
||||
pub mod lock;
|
||||
pub mod logging;
|
||||
pub mod metrics;
|
||||
|
||||
Reference in New Issue
Block a user