From 1799842b59aff3575fcfa3b3ccd72952c7f6899f Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Fri, 13 Feb 2026 12:36:49 -0500 Subject: [PATCH] refactor(core): extract path_resolver module, deduplicate escape_like MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract path resolution logic into a new `src/core/path_resolver.rs` module to enable reuse across the codebase. Previously, `escape_like`, `normalize_repo_path`, `PathQuery`, `build_path_query`, `SuffixResult`, and `suffix_probe` were private to `who.rs`, making them inaccessible to other modules that needed the same path-matching logic. Key changes: - New `path_resolver.rs` with all path resolution functions made `pub` - New `path_resolver_tests.rs` with 15 tests covering: escape_like, normalize_repo_path, build_path_query heuristics, DB probes (exact, prefix, suffix), project scoping, and cross-source deduplication - Remove duplicate `escape_like` from `list.rs` (was `note_escape_like`) and `search/filters.rs` — both now import from `path_resolver` - `project.rs` imports `escape_like` from the new module The path resolution strategy (exact > prefix > suffix > heuristic fallback) and all DB probe logic are preserved exactly as extracted. `suffix_probe` checks both `new_path` and `old_path` columns to support querying renamed files. Co-Authored-By: Claude Opus 4.6 --- src/cli/commands/list.rs | 8 +- src/core/mod.rs | 1 + src/core/path_resolver.rs | 244 +++++++++++++++++++++++++++ src/core/path_resolver_tests.rs | 290 ++++++++++++++++++++++++++++++++ src/core/project.rs | 8 +- src/search/filters.rs | 7 +- 6 files changed, 538 insertions(+), 20 deletions(-) create mode 100644 src/core/path_resolver.rs create mode 100644 src/core/path_resolver_tests.rs diff --git a/src/cli/commands/list.rs b/src/cli/commands/list.rs index 2735ceb..50e6dfa 100644 --- a/src/cli/commands/list.rs +++ b/src/cli/commands/list.rs @@ -6,6 +6,7 @@ use crate::Config; use crate::cli::robot::{RobotMeta, expand_fields_preset, filter_fields}; use crate::core::db::create_connection; use crate::core::error::{LoreError, Result}; +use crate::core::path_resolver::escape_like as note_escape_like; use crate::core::paths::get_db_path; use crate::core::project::resolve_project; use crate::core::time::{ms_to_iso, now_ms, parse_since}; @@ -1257,13 +1258,6 @@ pub struct NoteListFilters { pub order: String, } -fn note_escape_like(input: &str) -> String { - input - .replace('\\', "\\\\") - .replace('%', "\\%") - .replace('_', "\\_") -} - pub fn query_notes( conn: &Connection, filters: &NoteListFilters, diff --git a/src/core/mod.rs b/src/core/mod.rs index 30a288a..bbfbc63 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -9,6 +9,7 @@ pub mod lock; pub mod logging; pub mod metrics; pub mod note_parser; +pub mod path_resolver; pub mod paths; pub mod payloads; pub mod project; diff --git a/src/core/path_resolver.rs b/src/core/path_resolver.rs new file mode 100644 index 0000000..30df73c --- /dev/null +++ b/src/core/path_resolver.rs @@ -0,0 +1,244 @@ +use rusqlite::Connection; + +use super::error::{LoreError, Result}; + +// ─── SQL Helpers ───────────────────────────────────────────────────────────── + +/// Escape LIKE metacharacters (`%`, `_`, `\`). +/// All queries using this must include `ESCAPE '\'`. +pub fn escape_like(input: &str) -> String { + input + .replace('\\', "\\\\") + .replace('%', "\\%") + .replace('_', "\\_") +} + +/// Normalize user-supplied repo paths to match stored DiffNote / file-change paths. +/// - trims whitespace +/// - strips leading "./" and "/" (repo-relative paths) +/// - converts '\' to '/' when no '/' present (Windows paste) +/// - collapses repeated "//" +pub fn normalize_repo_path(input: &str) -> String { + let mut s = input.trim().to_string(); + // Windows backslash normalization (only when no forward slashes present) + if s.contains('\\') && !s.contains('/') { + s = s.replace('\\', "/"); + } + // Strip leading ./ + while s.starts_with("./") { + s = s[2..].to_string(); + } + // Strip leading / + s = s.trim_start_matches('/').to_string(); + // Collapse repeated // + while s.contains("//") { + s = s.replace("//", "/"); + } + s +} + +// ─── Path Query Resolution ────────────────────────────────────────────────── + +/// Describes how to match a user-supplied path in SQL. +#[derive(Debug)] +pub struct PathQuery { + /// The parameter value to bind. + pub value: String, + /// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`. + pub is_prefix: bool, +} + +/// Result of a suffix probe against the DB. +pub enum SuffixResult { + /// Suffix probe was not attempted (conditions not met). + NotAttempted, + /// No paths matched the suffix. + NoMatch, + /// Exactly one distinct path matched — auto-resolve. + Unique(String), + /// Multiple distinct paths matched — user must disambiguate. + Ambiguous(Vec), +} + +/// Build a path query from a user-supplied path, with project-scoped DB probes. +/// +/// Resolution strategy (in priority order): +/// 1. Trailing `/` → directory prefix (LIKE `path/%`) +/// 2. Exact match probe against notes + `mr_file_changes` → exact (= `path`) +/// 3. Directory prefix probe → prefix (LIKE `path/%`) +/// 4. Suffix probe for bare filenames → auto-resolve or ambiguity error +/// 5. Heuristic fallback: `.` in last segment → file, else → directory prefix +pub fn build_path_query( + conn: &Connection, + path: &str, + project_id: Option, +) -> Result { + let trimmed = path.trim_end_matches('/'); + let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); + let is_root = !trimmed.contains('/'); + let forced_dir = path.ends_with('/'); + // Heuristic is now only a fallback; probes decide first when ambiguous. + let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); + + // Probe 1: exact file exists in DiffNotes OR mr_file_changes (project-scoped) + // Checks both new_path and old_path to support querying renamed files. + let exact_exists = conn + .query_row( + "SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND (position_new_path = ?1 OR position_old_path = ?1) + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![trimmed, project_id], + |_| Ok(()), + ) + .is_ok() + || conn + .query_row( + "SELECT 1 FROM mr_file_changes + WHERE (new_path = ?1 OR old_path = ?1) + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![trimmed, project_id], + |_| Ok(()), + ) + .is_ok(); + + // Probe 2: directory prefix exists in DiffNotes OR mr_file_changes (project-scoped) + let prefix_exists = if !forced_dir && !exact_exists { + let escaped = escape_like(trimmed); + let pat = format!("{escaped}/%"); + conn.query_row( + "SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_old_path LIKE ?1 ESCAPE '\\') + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![pat, project_id], + |_| Ok(()), + ) + .is_ok() + || conn + .query_row( + "SELECT 1 FROM mr_file_changes + WHERE (new_path LIKE ?1 ESCAPE '\\' OR old_path LIKE ?1 ESCAPE '\\') + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![pat, project_id], + |_| Ok(()), + ) + .is_ok() + } else { + false + }; + + // Probe 3: suffix match — user typed a bare filename or partial path that + // doesn't exist as-is. Search for full paths ending with /input (or equal to input). + // This handles "login.rs" matching "src/auth/login.rs". + let suffix_resolved = if !forced_dir && !exact_exists && !prefix_exists && looks_like_file { + suffix_probe(conn, trimmed, project_id)? + } else { + SuffixResult::NotAttempted + }; + + match suffix_resolved { + SuffixResult::Unique(full_path) => Ok(PathQuery { + value: full_path, + is_prefix: false, + }), + SuffixResult::Ambiguous(candidates) => { + let list = candidates + .iter() + .map(|p| format!(" {p}")) + .collect::>() + .join("\n"); + Err(LoreError::Ambiguous(format!( + "'{trimmed}' matches multiple paths. Use the full path or -p to scope:\n{list}" + ))) + } + SuffixResult::NotAttempted | SuffixResult::NoMatch => { + // Original logic: exact > prefix > heuristic + let is_file = if forced_dir { + false + } else if exact_exists { + true + } else if prefix_exists { + false + } else { + looks_like_file + }; + + if is_file { + Ok(PathQuery { + value: trimmed.to_string(), + is_prefix: false, + }) + } else { + let escaped = escape_like(trimmed); + Ok(PathQuery { + value: format!("{escaped}/%"), + is_prefix: true, + }) + } + } + } +} + +/// Probe both notes and mr_file_changes for paths ending with the given suffix. +/// Searches both new_path and old_path columns to support renamed file resolution. +/// Returns up to 11 distinct candidates (enough to detect ambiguity + show a useful list). +pub fn suffix_probe( + conn: &Connection, + suffix: &str, + project_id: Option, +) -> Result { + let escaped = escape_like(suffix); + let suffix_pat = format!("%/{escaped}"); + + let mut stmt = conn.prepare_cached( + "SELECT DISTINCT full_path FROM ( + SELECT position_new_path AS full_path + FROM notes INDEXED BY idx_notes_diffnote_path_created + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_new_path = ?2) + AND (?3 IS NULL OR project_id = ?3) + UNION + SELECT new_path AS full_path FROM mr_file_changes + WHERE (new_path LIKE ?1 ESCAPE '\\' OR new_path = ?2) + AND (?3 IS NULL OR project_id = ?3) + UNION + SELECT position_old_path AS full_path FROM notes + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND position_old_path IS NOT NULL + AND (position_old_path LIKE ?1 ESCAPE '\\' OR position_old_path = ?2) + AND (?3 IS NULL OR project_id = ?3) + UNION + SELECT old_path AS full_path FROM mr_file_changes + WHERE old_path IS NOT NULL + AND (old_path LIKE ?1 ESCAPE '\\' OR old_path = ?2) + AND (?3 IS NULL OR project_id = ?3) + ) + ORDER BY full_path + LIMIT 11", + )?; + + let candidates: Vec = stmt + .query_map(rusqlite::params![suffix_pat, suffix, project_id], |row| { + row.get(0) + })? + .collect::, _>>()?; + + match candidates.len() { + 0 => Ok(SuffixResult::NoMatch), + 1 => Ok(SuffixResult::Unique(candidates.into_iter().next().unwrap())), + _ => Ok(SuffixResult::Ambiguous(candidates)), + } +} + +#[cfg(test)] +#[path = "path_resolver_tests.rs"] +mod tests; diff --git a/src/core/path_resolver_tests.rs b/src/core/path_resolver_tests.rs new file mode 100644 index 0000000..ec6fe01 --- /dev/null +++ b/src/core/path_resolver_tests.rs @@ -0,0 +1,290 @@ +use super::*; +use crate::core::db::{create_connection, run_migrations}; +use std::path::Path; + +fn setup_test_db() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + conn +} + +fn seed_project(conn: &Connection, id: i64) { + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (?1, ?1, 'group/repo', 'https://gl.example.com/group/repo', 1000, 2000)", + rusqlite::params![id], + ) + .unwrap(); +} + +fn seed_mr(conn: &Connection, mr_id: i64, project_id: i64) { + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (?1, ?1, ?1, ?2, 'MR', 'merged', 1000, 2000, 2000, 'feat', 'main')", + rusqlite::params![mr_id, project_id], + ) + .unwrap(); +} + +fn seed_file_change(conn: &Connection, mr_id: i64, project_id: i64, path: &str) { + conn.execute( + "INSERT INTO mr_file_changes (merge_request_id, project_id, new_path, change_type) + VALUES (?1, ?2, ?3, 'modified')", + rusqlite::params![mr_id, project_id, path], + ) + .unwrap(); +} + +fn seed_diffnote(conn: &Connection, id: i64, project_id: i64, path: &str) { + // Need a discussion first (MergeRequest type, linked to mr_id=1) + conn.execute( + "INSERT OR IGNORE INTO discussions (id, gitlab_discussion_id, project_id, \ + merge_request_id, noteable_type, resolvable, resolved, last_seen_at, last_note_at) + VALUES (?1, ?2, ?3, 1, 'MergeRequest', 1, 0, 2000, 2000)", + rusqlite::params![id, format!("disc-{id}"), project_id], + ) + .unwrap(); + conn.execute( + "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, note_type, is_system, \ + author_username, body, created_at, updated_at, last_seen_at, position_new_path) + VALUES (?1, ?1, ?1, ?2, 'DiffNote', 0, 'user', 'note', 1000, 2000, 2000, ?3)", + rusqlite::params![id, project_id, path], + ) + .unwrap(); +} + +// ─── escape_like ───────────────────────────────────────────────────────────── + +#[test] +fn test_escape_like() { + assert_eq!(escape_like("normal/path"), "normal/path"); + assert_eq!(escape_like("has_underscore"), "has\\_underscore"); + assert_eq!(escape_like("has%percent"), "has\\%percent"); + assert_eq!(escape_like("has\\backslash"), "has\\\\backslash"); +} + +// ─── normalize_repo_path ───────────────────────────────────────────────────── + +#[test] +fn test_normalize_repo_path() { + assert_eq!(normalize_repo_path("./src/foo/"), "src/foo/"); + assert_eq!(normalize_repo_path("/src/foo/"), "src/foo/"); + assert_eq!(normalize_repo_path("././src/foo"), "src/foo"); + assert_eq!(normalize_repo_path("src\\foo\\bar.rs"), "src/foo/bar.rs"); + assert_eq!(normalize_repo_path("src/foo\\bar"), "src/foo\\bar"); + assert_eq!(normalize_repo_path("src//foo//bar/"), "src/foo/bar/"); + assert_eq!(normalize_repo_path(" src/foo/ "), "src/foo/"); + assert_eq!(normalize_repo_path("src/foo/bar.rs"), "src/foo/bar.rs"); + assert_eq!(normalize_repo_path(""), ""); +} + +// ─── build_path_query heuristics (no DB data) ────────────────────────────── + +#[test] +fn test_trailing_slash_is_prefix() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "src/auth/", None).unwrap(); + assert_eq!(pq.value, "src/auth/%"); + assert!(pq.is_prefix); +} + +#[test] +fn test_no_dot_in_last_segment_is_prefix() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "src/auth", None).unwrap(); + assert_eq!(pq.value, "src/auth/%"); + assert!(pq.is_prefix); +} + +#[test] +fn test_file_extension_is_exact() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "src/auth/login.rs", None).unwrap(); + assert_eq!(pq.value, "src/auth/login.rs"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_root_file_is_exact() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "README.md", None).unwrap(); + assert_eq!(pq.value, "README.md"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_dotless_root_file_is_exact() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "Makefile", None).unwrap(); + assert_eq!(pq.value, "Makefile"); + assert!(!pq.is_prefix); + + let pq = build_path_query(&conn, "LICENSE", None).unwrap(); + assert_eq!(pq.value, "LICENSE"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_metacharacters_escaped_in_prefix() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "src/test_files/", None).unwrap(); + assert_eq!(pq.value, "src/test\\_files/%"); + assert!(pq.is_prefix); +} + +#[test] +fn test_exact_value_not_escaped() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "README_with_underscore.md", None).unwrap(); + assert_eq!(pq.value, "README_with_underscore.md"); + assert!(!pq.is_prefix); +} + +// ─── build_path_query DB probes ───────────────────────────────────────────── + +#[test] +fn test_db_probe_detects_dotless_file() { + // "src/Dockerfile" has no dot in last segment -> normally prefix. + // DB probe detects it's actually a file. + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_diffnote(&conn, 1, 1, "src/Dockerfile"); + + let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap(); + assert_eq!(pq.value, "src/Dockerfile"); + assert!(!pq.is_prefix); + + // Without DB data -> falls through to prefix + let empty = setup_test_db(); + let pq2 = build_path_query(&empty, "src/Dockerfile", None).unwrap(); + assert!(pq2.is_prefix); +} + +#[test] +fn test_db_probe_via_file_changes() { + // Exact match via mr_file_changes even without notes + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/Dockerfile"); + + let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap(); + assert_eq!(pq.value, "src/Dockerfile"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_db_probe_project_scoped() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_project(&conn, 2); + seed_mr(&conn, 1, 1); + seed_diffnote(&conn, 1, 1, "infra/Makefile"); + + // Unscoped: finds it + assert!( + !build_path_query(&conn, "infra/Makefile", None) + .unwrap() + .is_prefix + ); + // Scoped to project 1: finds it + assert!( + !build_path_query(&conn, "infra/Makefile", Some(1)) + .unwrap() + .is_prefix + ); + // Scoped to project 2: no data -> prefix + assert!( + build_path_query(&conn, "infra/Makefile", Some(2)) + .unwrap() + .is_prefix + ); +} + +// ─── suffix resolution ────────────────────────────────────────────────────── + +#[test] +fn test_suffix_resolves_bare_filename() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/auth/login.rs"); + + let pq = build_path_query(&conn, "login.rs", None).unwrap(); + assert_eq!(pq.value, "src/auth/login.rs"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_suffix_resolves_partial_path() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/auth/login.rs"); + + let pq = build_path_query(&conn, "auth/login.rs", None).unwrap(); + assert_eq!(pq.value, "src/auth/login.rs"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_suffix_ambiguous_returns_error() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/auth/utils.rs"); + seed_file_change(&conn, 1, 1, "src/db/utils.rs"); + + let err = build_path_query(&conn, "utils.rs", None).unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("src/auth/utils.rs"), "candidates: {msg}"); + assert!(msg.contains("src/db/utils.rs"), "candidates: {msg}"); +} + +#[test] +fn test_suffix_scoped_to_project() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_project(&conn, 2); + seed_mr(&conn, 1, 1); + seed_mr(&conn, 2, 2); + seed_file_change(&conn, 1, 1, "src/utils.rs"); + seed_file_change(&conn, 2, 2, "lib/utils.rs"); + + // Unscoped: ambiguous + assert!(build_path_query(&conn, "utils.rs", None).is_err()); + + // Scoped to project 1: resolves + let pq = build_path_query(&conn, "utils.rs", Some(1)).unwrap(); + assert_eq!(pq.value, "src/utils.rs"); +} + +#[test] +fn test_suffix_deduplicates_across_sources() { + // Same path in notes AND file_changes -> single match, not ambiguous + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/auth/login.rs"); + seed_diffnote(&conn, 1, 1, "src/auth/login.rs"); + + let pq = build_path_query(&conn, "login.rs", None).unwrap(); + assert_eq!(pq.value, "src/auth/login.rs"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_exact_match_preferred_over_suffix() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "README.md"); + seed_file_change(&conn, 1, 1, "docs/README.md"); + + // "README.md" exists as exact match -> no ambiguity + let pq = build_path_query(&conn, "README.md", None).unwrap(); + assert_eq!(pq.value, "README.md"); + assert!(!pq.is_prefix); +} diff --git a/src/core/project.rs b/src/core/project.rs index 0f362aa..f3b968b 100644 --- a/src/core/project.rs +++ b/src/core/project.rs @@ -1,6 +1,7 @@ use rusqlite::Connection; use super::error::{LoreError, Result}; +use super::path_resolver::escape_like; pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { let exact = conn.query_row( @@ -106,13 +107,6 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { /// Escape LIKE metacharacters so `%` and `_` in user input are treated as /// literals. All queries using this must include `ESCAPE '\'`. -fn escape_like(input: &str) -> String { - input - .replace('\\', "\\\\") - .replace('%', "\\%") - .replace('_', "\\_") -} - #[cfg(test)] #[path = "project_tests.rs"] mod tests; diff --git a/src/search/filters.rs b/src/search/filters.rs index 83acb7f..018f059 100644 --- a/src/search/filters.rs +++ b/src/search/filters.rs @@ -1,4 +1,5 @@ use crate::core::error::Result; +use crate::core::path_resolver::escape_like; use crate::documents::SourceType; use rusqlite::Connection; @@ -43,12 +44,6 @@ impl SearchFilters { } } -fn escape_like(s: &str) -> String { - s.replace('\\', "\\\\") - .replace('%', "\\%") - .replace('_', "\\_") -} - pub fn apply_filters( conn: &Connection, document_ids: &[i64],