feat(who): expand expert + overlap queries with mr_file_changes and mr_reviewers

Chain: bd-jec (config flag) -> bd-2yo (fetch MR diffs) -> bd-3qn6 (rewrite who queries)

- Add fetch_mr_file_changes config option and --no-file-changes CLI flag
- Add GitLab MR diffs API fetch pipeline with watermark-based sync
- Create migration 020 for diffs_synced_for_updated_at watermark column
- Rewrite query_expert() and query_overlap() to use 4-signal UNION ALL:
  DiffNote reviewers, DiffNote MR authors, file-change authors, file-change reviewers
- Deduplicate across signal types via COUNT(DISTINCT CASE WHEN ... THEN mr_id END)
- Add insert_file_change test helper, 8 new who tests, all 397 tests pass
- Also includes: list performance migration 019, autocorrect module, README updates

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-08 13:35:14 -05:00
parent 435a208c93
commit 95b7183add
19 changed files with 2139 additions and 291 deletions

268
src/ingestion/mr_diffs.rs Normal file
View File

@@ -0,0 +1,268 @@
use rusqlite::Connection;
use tracing::debug;
use crate::core::error::Result;
use crate::gitlab::types::GitLabMrDiff;
/// Derive the change type from GitLab's boolean flags.
fn derive_change_type(diff: &GitLabMrDiff) -> &'static str {
if diff.new_file {
"added"
} else if diff.renamed_file {
"renamed"
} else if diff.deleted_file {
"deleted"
} else {
"modified"
}
}
/// Replace all file change records for a given MR with the provided diffs.
/// Uses DELETE+INSERT (simpler than UPSERT for array replacement).
pub fn upsert_mr_file_changes(
conn: &Connection,
mr_local_id: i64,
project_id: i64,
diffs: &[GitLabMrDiff],
) -> Result<usize> {
conn.execute(
"DELETE FROM mr_file_changes WHERE merge_request_id = ?1",
[mr_local_id],
)?;
let mut stmt = conn.prepare_cached(
"INSERT INTO mr_file_changes (merge_request_id, project_id, old_path, new_path, change_type) \
VALUES (?1, ?2, ?3, ?4, ?5)",
)?;
let mut inserted = 0;
for diff in diffs {
let old_path = if diff.renamed_file {
Some(diff.old_path.as_str())
} else {
None
};
let change_type = derive_change_type(diff);
stmt.execute(rusqlite::params![
mr_local_id,
project_id,
old_path,
diff.new_path,
change_type,
])?;
inserted += 1;
}
if inserted > 0 {
debug!(inserted, mr_local_id, "Stored MR file changes");
}
Ok(inserted)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
// Insert a test project
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')",
[],
).unwrap();
// Insert a test MR
conn.execute(
"INSERT INTO merge_requests (gitlab_id, iid, project_id, title, state, draft, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at) \
VALUES (100, 1, 1, 'Test MR', 'merged', 0, 'feature', 'main', 'testuser', 1000, 2000, 3000)",
[],
).unwrap();
conn
}
#[test]
fn test_derive_change_type_added() {
let diff = GitLabMrDiff {
old_path: String::new(),
new_path: "src/new.rs".to_string(),
new_file: true,
renamed_file: false,
deleted_file: false,
};
assert_eq!(derive_change_type(&diff), "added");
}
#[test]
fn test_derive_change_type_renamed() {
let diff = GitLabMrDiff {
old_path: "src/old.rs".to_string(),
new_path: "src/new.rs".to_string(),
new_file: false,
renamed_file: true,
deleted_file: false,
};
assert_eq!(derive_change_type(&diff), "renamed");
}
#[test]
fn test_derive_change_type_deleted() {
let diff = GitLabMrDiff {
old_path: "src/gone.rs".to_string(),
new_path: "src/gone.rs".to_string(),
new_file: false,
renamed_file: false,
deleted_file: true,
};
assert_eq!(derive_change_type(&diff), "deleted");
}
#[test]
fn test_derive_change_type_modified() {
let diff = GitLabMrDiff {
old_path: "src/lib.rs".to_string(),
new_path: "src/lib.rs".to_string(),
new_file: false,
renamed_file: false,
deleted_file: false,
};
assert_eq!(derive_change_type(&diff), "modified");
}
#[test]
fn test_upsert_inserts_file_changes() {
let conn = setup();
let diffs = [
GitLabMrDiff {
old_path: String::new(),
new_path: "src/new.rs".to_string(),
new_file: true,
renamed_file: false,
deleted_file: false,
},
GitLabMrDiff {
old_path: "src/lib.rs".to_string(),
new_path: "src/lib.rs".to_string(),
new_file: false,
renamed_file: false,
deleted_file: false,
},
];
let inserted = upsert_mr_file_changes(&conn, 1, 1, &diffs).unwrap();
assert_eq!(inserted, 2);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM mr_file_changes WHERE merge_request_id = 1",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(count, 2);
}
#[test]
fn test_upsert_replaces_existing() {
let conn = setup();
let diffs_v1 = [GitLabMrDiff {
old_path: String::new(),
new_path: "src/old.rs".to_string(),
new_file: true,
renamed_file: false,
deleted_file: false,
}];
upsert_mr_file_changes(&conn, 1, 1, &diffs_v1).unwrap();
let diffs_v2 = [
GitLabMrDiff {
old_path: "src/a.rs".to_string(),
new_path: "src/a.rs".to_string(),
new_file: false,
renamed_file: false,
deleted_file: false,
},
GitLabMrDiff {
old_path: "src/b.rs".to_string(),
new_path: "src/b.rs".to_string(),
new_file: false,
renamed_file: false,
deleted_file: false,
},
];
let inserted = upsert_mr_file_changes(&conn, 1, 1, &diffs_v2).unwrap();
assert_eq!(inserted, 2);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM mr_file_changes WHERE merge_request_id = 1",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(count, 2);
// The old "src/old.rs" should be gone
let old_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM mr_file_changes WHERE new_path = 'src/old.rs'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(old_count, 0);
}
#[test]
fn test_renamed_stores_old_path() {
let conn = setup();
let diffs = [GitLabMrDiff {
old_path: "src/old_name.rs".to_string(),
new_path: "src/new_name.rs".to_string(),
new_file: false,
renamed_file: true,
deleted_file: false,
}];
upsert_mr_file_changes(&conn, 1, 1, &diffs).unwrap();
let (old_path, change_type): (Option<String>, String) = conn
.query_row(
"SELECT old_path, change_type FROM mr_file_changes WHERE new_path = 'src/new_name.rs'",
[],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.unwrap();
assert_eq!(old_path.as_deref(), Some("src/old_name.rs"));
assert_eq!(change_type, "renamed");
}
#[test]
fn test_non_renamed_has_null_old_path() {
let conn = setup();
let diffs = [GitLabMrDiff {
old_path: "src/lib.rs".to_string(),
new_path: "src/lib.rs".to_string(),
new_file: false,
renamed_file: false,
deleted_file: false,
}];
upsert_mr_file_changes(&conn, 1, 1, &diffs).unwrap();
let old_path: Option<String> = conn
.query_row(
"SELECT old_path FROM mr_file_changes WHERE new_path = 'src/lib.rs'",
[],
|r| r.get(0),
)
.unwrap();
assert!(old_path.is_none());
}
}