refactor(core): extract path_resolver module, fix old_path matching in who

Extract shared path resolution logic from who.rs into a new
core::path_resolver module for cross-module reuse. Functions moved:
escape_like, normalize_repo_path, PathQuery, SuffixResult,
build_path_query, suffix_probe. Duplicate escape_like copies removed
from list.rs, project.rs, and filters.rs — all now import from
path_resolver.

Additionally fixes two bugs in query_expert_details() and
query_overlap() where only position_new_path was checked (missing
old_path matches for renamed files) and state filter excluded 'closed'
MRs despite the main scoring query including them with a decay
multiplier.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
teernisse
2026-02-13 12:00:44 -05:00
parent 8c86b0dfd7
commit e6771709f1
7 changed files with 580 additions and 281 deletions

View File

@@ -9,6 +9,7 @@ pub mod lock;
pub mod logging;
pub mod metrics;
pub mod note_parser;
pub mod path_resolver;
pub mod paths;
pub mod payloads;
pub mod project;

244
src/core/path_resolver.rs Normal file
View File

@@ -0,0 +1,244 @@
use rusqlite::Connection;
use super::error::{LoreError, Result};
// ─── SQL Helpers ─────────────────────────────────────────────────────────────
/// Escape LIKE metacharacters (`%`, `_`, `\`).
/// All queries using this must include `ESCAPE '\'`.
pub fn escape_like(input: &str) -> String {
input
.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('_', "\\_")
}
/// Normalize user-supplied repo paths to match stored DiffNote / file-change paths.
/// - trims whitespace
/// - strips leading "./" and "/" (repo-relative paths)
/// - converts '\' to '/' when no '/' present (Windows paste)
/// - collapses repeated "//"
pub fn normalize_repo_path(input: &str) -> String {
let mut s = input.trim().to_string();
// Windows backslash normalization (only when no forward slashes present)
if s.contains('\\') && !s.contains('/') {
s = s.replace('\\', "/");
}
// Strip leading ./
while s.starts_with("./") {
s = s[2..].to_string();
}
// Strip leading /
s = s.trim_start_matches('/').to_string();
// Collapse repeated //
while s.contains("//") {
s = s.replace("//", "/");
}
s
}
// ─── Path Query Resolution ──────────────────────────────────────────────────
/// Describes how to match a user-supplied path in SQL.
#[derive(Debug)]
pub struct PathQuery {
/// The parameter value to bind.
pub value: String,
/// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`.
pub is_prefix: bool,
}
/// Result of a suffix probe against the DB.
pub enum SuffixResult {
/// Suffix probe was not attempted (conditions not met).
NotAttempted,
/// No paths matched the suffix.
NoMatch,
/// Exactly one distinct path matched — auto-resolve.
Unique(String),
/// Multiple distinct paths matched — user must disambiguate.
Ambiguous(Vec<String>),
}
/// Build a path query from a user-supplied path, with project-scoped DB probes.
///
/// Resolution strategy (in priority order):
/// 1. Trailing `/` → directory prefix (LIKE `path/%`)
/// 2. Exact match probe against notes + `mr_file_changes` → exact (= `path`)
/// 3. Directory prefix probe → prefix (LIKE `path/%`)
/// 4. Suffix probe for bare filenames → auto-resolve or ambiguity error
/// 5. Heuristic fallback: `.` in last segment → file, else → directory prefix
pub fn build_path_query(
conn: &Connection,
path: &str,
project_id: Option<i64>,
) -> Result<PathQuery> {
let trimmed = path.trim_end_matches('/');
let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
let is_root = !trimmed.contains('/');
let forced_dir = path.ends_with('/');
// Heuristic is now only a fallback; probes decide first when ambiguous.
let looks_like_file = !forced_dir && (is_root || last_segment.contains('.'));
// Probe 1: exact file exists in DiffNotes OR mr_file_changes (project-scoped)
// Checks both new_path and old_path to support querying renamed files.
let exact_exists = conn
.query_row(
"SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path = ?1 OR position_old_path = ?1)
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![trimmed, project_id],
|_| Ok(()),
)
.is_ok()
|| conn
.query_row(
"SELECT 1 FROM mr_file_changes
WHERE (new_path = ?1 OR old_path = ?1)
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![trimmed, project_id],
|_| Ok(()),
)
.is_ok();
// Probe 2: directory prefix exists in DiffNotes OR mr_file_changes (project-scoped)
let prefix_exists = if !forced_dir && !exact_exists {
let escaped = escape_like(trimmed);
let pat = format!("{escaped}/%");
conn.query_row(
"SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_old_path LIKE ?1 ESCAPE '\\')
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![pat, project_id],
|_| Ok(()),
)
.is_ok()
|| conn
.query_row(
"SELECT 1 FROM mr_file_changes
WHERE (new_path LIKE ?1 ESCAPE '\\' OR old_path LIKE ?1 ESCAPE '\\')
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![pat, project_id],
|_| Ok(()),
)
.is_ok()
} else {
false
};
// Probe 3: suffix match — user typed a bare filename or partial path that
// doesn't exist as-is. Search for full paths ending with /input (or equal to input).
// This handles "login.rs" matching "src/auth/login.rs".
let suffix_resolved = if !forced_dir && !exact_exists && !prefix_exists && looks_like_file {
suffix_probe(conn, trimmed, project_id)?
} else {
SuffixResult::NotAttempted
};
match suffix_resolved {
SuffixResult::Unique(full_path) => Ok(PathQuery {
value: full_path,
is_prefix: false,
}),
SuffixResult::Ambiguous(candidates) => {
let list = candidates
.iter()
.map(|p| format!(" {p}"))
.collect::<Vec<_>>()
.join("\n");
Err(LoreError::Ambiguous(format!(
"'{trimmed}' matches multiple paths. Use the full path or -p to scope:\n{list}"
)))
}
SuffixResult::NotAttempted | SuffixResult::NoMatch => {
// Original logic: exact > prefix > heuristic
let is_file = if forced_dir {
false
} else if exact_exists {
true
} else if prefix_exists {
false
} else {
looks_like_file
};
if is_file {
Ok(PathQuery {
value: trimmed.to_string(),
is_prefix: false,
})
} else {
let escaped = escape_like(trimmed);
Ok(PathQuery {
value: format!("{escaped}/%"),
is_prefix: true,
})
}
}
}
}
/// Probe both notes and mr_file_changes for paths ending with the given suffix.
/// Searches both new_path and old_path columns to support renamed file resolution.
/// Returns up to 11 distinct candidates (enough to detect ambiguity + show a useful list).
pub fn suffix_probe(
conn: &Connection,
suffix: &str,
project_id: Option<i64>,
) -> Result<SuffixResult> {
let escaped = escape_like(suffix);
let suffix_pat = format!("%/{escaped}");
let mut stmt = conn.prepare_cached(
"SELECT DISTINCT full_path FROM (
SELECT position_new_path AS full_path
FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_new_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT new_path AS full_path FROM mr_file_changes
WHERE (new_path LIKE ?1 ESCAPE '\\' OR new_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT position_old_path AS full_path FROM notes
WHERE note_type = 'DiffNote'
AND is_system = 0
AND position_old_path IS NOT NULL
AND (position_old_path LIKE ?1 ESCAPE '\\' OR position_old_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT old_path AS full_path FROM mr_file_changes
WHERE old_path IS NOT NULL
AND (old_path LIKE ?1 ESCAPE '\\' OR old_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
)
ORDER BY full_path
LIMIT 11",
)?;
let candidates: Vec<String> = stmt
.query_map(rusqlite::params![suffix_pat, suffix, project_id], |row| {
row.get(0)
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
match candidates.len() {
0 => Ok(SuffixResult::NoMatch),
1 => Ok(SuffixResult::Unique(candidates.into_iter().next().unwrap())),
_ => Ok(SuffixResult::Ambiguous(candidates)),
}
}
#[cfg(test)]
#[path = "path_resolver_tests.rs"]
mod tests;

View File

@@ -0,0 +1,290 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn seed_project(conn: &Connection, id: i64) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (?1, ?1, 'group/repo', 'https://gl.example.com/group/repo', 1000, 2000)",
rusqlite::params![id],
)
.unwrap();
}
fn seed_mr(conn: &Connection, mr_id: i64, project_id: i64) {
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (?1, ?1, ?1, ?2, 'MR', 'merged', 1000, 2000, 2000, 'feat', 'main')",
rusqlite::params![mr_id, project_id],
)
.unwrap();
}
fn seed_file_change(conn: &Connection, mr_id: i64, project_id: i64, path: &str) {
conn.execute(
"INSERT INTO mr_file_changes (merge_request_id, project_id, new_path, change_type)
VALUES (?1, ?2, ?3, 'modified')",
rusqlite::params![mr_id, project_id, path],
)
.unwrap();
}
fn seed_diffnote(conn: &Connection, id: i64, project_id: i64, path: &str) {
// Need a discussion first (MergeRequest type, linked to mr_id=1)
conn.execute(
"INSERT OR IGNORE INTO discussions (id, gitlab_discussion_id, project_id, \
merge_request_id, noteable_type, resolvable, resolved, last_seen_at, last_note_at)
VALUES (?1, ?2, ?3, 1, 'MergeRequest', 1, 0, 2000, 2000)",
rusqlite::params![id, format!("disc-{id}"), project_id],
)
.unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, note_type, is_system, \
author_username, body, created_at, updated_at, last_seen_at, position_new_path)
VALUES (?1, ?1, ?1, ?2, 'DiffNote', 0, 'user', 'note', 1000, 2000, 2000, ?3)",
rusqlite::params![id, project_id, path],
)
.unwrap();
}
// ─── escape_like ─────────────────────────────────────────────────────────────
#[test]
fn test_escape_like() {
assert_eq!(escape_like("normal/path"), "normal/path");
assert_eq!(escape_like("has_underscore"), "has\\_underscore");
assert_eq!(escape_like("has%percent"), "has\\%percent");
assert_eq!(escape_like("has\\backslash"), "has\\\\backslash");
}
// ─── normalize_repo_path ─────────────────────────────────────────────────────
#[test]
fn test_normalize_repo_path() {
assert_eq!(normalize_repo_path("./src/foo/"), "src/foo/");
assert_eq!(normalize_repo_path("/src/foo/"), "src/foo/");
assert_eq!(normalize_repo_path("././src/foo"), "src/foo");
assert_eq!(normalize_repo_path("src\\foo\\bar.rs"), "src/foo/bar.rs");
assert_eq!(normalize_repo_path("src/foo\\bar"), "src/foo\\bar");
assert_eq!(normalize_repo_path("src//foo//bar/"), "src/foo/bar/");
assert_eq!(normalize_repo_path(" src/foo/ "), "src/foo/");
assert_eq!(normalize_repo_path("src/foo/bar.rs"), "src/foo/bar.rs");
assert_eq!(normalize_repo_path(""), "");
}
// ─── build_path_query heuristics (no DB data) ──────────────────────────────
#[test]
fn test_trailing_slash_is_prefix() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "src/auth/", None).unwrap();
assert_eq!(pq.value, "src/auth/%");
assert!(pq.is_prefix);
}
#[test]
fn test_no_dot_in_last_segment_is_prefix() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "src/auth", None).unwrap();
assert_eq!(pq.value, "src/auth/%");
assert!(pq.is_prefix);
}
#[test]
fn test_file_extension_is_exact() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "src/auth/login.rs", None).unwrap();
assert_eq!(pq.value, "src/auth/login.rs");
assert!(!pq.is_prefix);
}
#[test]
fn test_root_file_is_exact() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "README.md", None).unwrap();
assert_eq!(pq.value, "README.md");
assert!(!pq.is_prefix);
}
#[test]
fn test_dotless_root_file_is_exact() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "Makefile", None).unwrap();
assert_eq!(pq.value, "Makefile");
assert!(!pq.is_prefix);
let pq = build_path_query(&conn, "LICENSE", None).unwrap();
assert_eq!(pq.value, "LICENSE");
assert!(!pq.is_prefix);
}
#[test]
fn test_metacharacters_escaped_in_prefix() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "src/test_files/", None).unwrap();
assert_eq!(pq.value, "src/test\\_files/%");
assert!(pq.is_prefix);
}
#[test]
fn test_exact_value_not_escaped() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "README_with_underscore.md", None).unwrap();
assert_eq!(pq.value, "README_with_underscore.md");
assert!(!pq.is_prefix);
}
// ─── build_path_query DB probes ─────────────────────────────────────────────
#[test]
fn test_db_probe_detects_dotless_file() {
// "src/Dockerfile" has no dot in last segment -> normally prefix.
// DB probe detects it's actually a file.
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_diffnote(&conn, 1, 1, "src/Dockerfile");
let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap();
assert_eq!(pq.value, "src/Dockerfile");
assert!(!pq.is_prefix);
// Without DB data -> falls through to prefix
let empty = setup_test_db();
let pq2 = build_path_query(&empty, "src/Dockerfile", None).unwrap();
assert!(pq2.is_prefix);
}
#[test]
fn test_db_probe_via_file_changes() {
// Exact match via mr_file_changes even without notes
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/Dockerfile");
let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap();
assert_eq!(pq.value, "src/Dockerfile");
assert!(!pq.is_prefix);
}
#[test]
fn test_db_probe_project_scoped() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_project(&conn, 2);
seed_mr(&conn, 1, 1);
seed_diffnote(&conn, 1, 1, "infra/Makefile");
// Unscoped: finds it
assert!(
!build_path_query(&conn, "infra/Makefile", None)
.unwrap()
.is_prefix
);
// Scoped to project 1: finds it
assert!(
!build_path_query(&conn, "infra/Makefile", Some(1))
.unwrap()
.is_prefix
);
// Scoped to project 2: no data -> prefix
assert!(
build_path_query(&conn, "infra/Makefile", Some(2))
.unwrap()
.is_prefix
);
}
// ─── suffix resolution ──────────────────────────────────────────────────────
#[test]
fn test_suffix_resolves_bare_filename() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/auth/login.rs");
let pq = build_path_query(&conn, "login.rs", None).unwrap();
assert_eq!(pq.value, "src/auth/login.rs");
assert!(!pq.is_prefix);
}
#[test]
fn test_suffix_resolves_partial_path() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/auth/login.rs");
let pq = build_path_query(&conn, "auth/login.rs", None).unwrap();
assert_eq!(pq.value, "src/auth/login.rs");
assert!(!pq.is_prefix);
}
#[test]
fn test_suffix_ambiguous_returns_error() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/auth/utils.rs");
seed_file_change(&conn, 1, 1, "src/db/utils.rs");
let err = build_path_query(&conn, "utils.rs", None).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("src/auth/utils.rs"), "candidates: {msg}");
assert!(msg.contains("src/db/utils.rs"), "candidates: {msg}");
}
#[test]
fn test_suffix_scoped_to_project() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_project(&conn, 2);
seed_mr(&conn, 1, 1);
seed_mr(&conn, 2, 2);
seed_file_change(&conn, 1, 1, "src/utils.rs");
seed_file_change(&conn, 2, 2, "lib/utils.rs");
// Unscoped: ambiguous
assert!(build_path_query(&conn, "utils.rs", None).is_err());
// Scoped to project 1: resolves
let pq = build_path_query(&conn, "utils.rs", Some(1)).unwrap();
assert_eq!(pq.value, "src/utils.rs");
}
#[test]
fn test_suffix_deduplicates_across_sources() {
// Same path in notes AND file_changes -> single match, not ambiguous
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/auth/login.rs");
seed_diffnote(&conn, 1, 1, "src/auth/login.rs");
let pq = build_path_query(&conn, "login.rs", None).unwrap();
assert_eq!(pq.value, "src/auth/login.rs");
assert!(!pq.is_prefix);
}
#[test]
fn test_exact_match_preferred_over_suffix() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "README.md");
seed_file_change(&conn, 1, 1, "docs/README.md");
// "README.md" exists as exact match -> no ambiguity
let pq = build_path_query(&conn, "README.md", None).unwrap();
assert_eq!(pq.value, "README.md");
assert!(!pq.is_prefix);
}

View File

@@ -1,6 +1,7 @@
use rusqlite::Connection;
use super::error::{LoreError, Result};
use super::path_resolver::escape_like;
pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
let exact = conn.query_row(
@@ -106,13 +107,6 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
/// Escape LIKE metacharacters so `%` and `_` in user input are treated as
/// literals. All queries using this must include `ESCAPE '\'`.
fn escape_like(input: &str) -> String {
input
.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('_', "\\_")
}
#[cfg(test)]
#[path = "project_tests.rs"]
mod tests;