diff --git a/src/cli/commands/list.rs b/src/cli/commands/list.rs index 2735ceb..50e6dfa 100644 --- a/src/cli/commands/list.rs +++ b/src/cli/commands/list.rs @@ -6,6 +6,7 @@ use crate::Config; use crate::cli::robot::{RobotMeta, expand_fields_preset, filter_fields}; use crate::core::db::create_connection; use crate::core::error::{LoreError, Result}; +use crate::core::path_resolver::escape_like as note_escape_like; use crate::core::paths::get_db_path; use crate::core::project::resolve_project; use crate::core::time::{ms_to_iso, now_ms, parse_since}; @@ -1257,13 +1258,6 @@ pub struct NoteListFilters { pub order: String, } -fn note_escape_like(input: &str) -> String { - input - .replace('\\', "\\\\") - .replace('%', "\\%") - .replace('_', "\\_") -} - pub fn query_notes( conn: &Connection, filters: &NoteListFilters, diff --git a/src/cli/commands/who.rs b/src/cli/commands/who.rs index 29bbb48..331c5c0 100644 --- a/src/cli/commands/who.rs +++ b/src/cli/commands/who.rs @@ -9,6 +9,9 @@ use crate::cli::robot::RobotMeta; use crate::core::config::ScoringConfig; use crate::core::db::create_connection; use crate::core::error::{LoreError, Result}; +use crate::core::path_resolver::{PathQuery, build_path_query, normalize_repo_path}; +#[cfg(test)] +use crate::core::path_resolver::{SuffixResult, escape_like, suffix_probe}; use crate::core::paths::get_db_path; use crate::core::project::resolve_project; use crate::core::time::{ms_to_iso, now_ms, parse_since, parse_since_from}; @@ -75,30 +78,6 @@ fn resolve_mode<'a>(args: &'a WhoArgs) -> Result> { )) } -/// Normalize user-supplied repo paths to match stored DiffNote paths. -/// - trims whitespace -/// - strips leading "./" and "/" (repo-relative paths) -/// - converts '\' to '/' when no '/' present (Windows paste) -/// - collapses repeated "//" -fn normalize_repo_path(input: &str) -> String { - let mut s = input.trim().to_string(); - // Windows backslash normalization (only when no forward slashes present) - if s.contains('\\') && !s.contains('/') { - s = s.replace('\\', "/"); - } - // Strip leading ./ - while s.starts_with("./") { - s = s[2..].to_string(); - } - // Strip leading / - s = s.trim_start_matches('/').to_string(); - // Collapse repeated // - while s.contains("//") { - s = s.replace("//", "/"); - } - s -} - // ─── Result Types ──────────────────────────────────────────────────────────── /// Top-level run result: carries resolved inputs + the mode-specific result. @@ -485,210 +464,6 @@ fn resolve_since_required(input: &str) -> Result { // ─── Path Query Construction ───────────────────────────────────────────────── -/// Describes how to match a user-supplied path in SQL. -#[derive(Debug)] -struct PathQuery { - /// The parameter value to bind. - value: String, - /// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`. - is_prefix: bool, -} - -/// Build a path query from a user-supplied path, with project-scoped DB probes. -/// -/// Rules: -/// - If the path ends with `/`, it's a directory prefix -> `escaped_path/%` (LIKE) -/// - If the path is a root path (no `/`) and does NOT end with `/`, treat as exact (=) -/// - Else if the last path segment contains `.`, heuristic suggests file (=) -/// - Two-way DB probe (project-scoped): when heuristics are ambiguous, -/// probe the DB to resolve. -/// - Otherwise, treat as directory prefix -> `escaped_path/%` (LIKE) -fn build_path_query(conn: &Connection, path: &str, project_id: Option) -> Result { - let trimmed = path.trim_end_matches('/'); - let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); - let is_root = !trimmed.contains('/'); - let forced_dir = path.ends_with('/'); - // Heuristic is now only a fallback; probes decide first when ambiguous. - let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); - - // Probe 1: exact file exists in DiffNotes OR mr_file_changes (project-scoped) - // Checks both new_path and old_path to support querying renamed files. - // Exact-match probes already use the partial index, but LIKE probes below - // benefit from the INDEXED BY hint (same planner issue as expert query). - let exact_exists = conn - .query_row( - "SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created - WHERE note_type = 'DiffNote' - AND is_system = 0 - AND (position_new_path = ?1 OR position_old_path = ?1) - AND (?2 IS NULL OR project_id = ?2) - LIMIT 1", - rusqlite::params![trimmed, project_id], - |_| Ok(()), - ) - .is_ok() - || conn - .query_row( - "SELECT 1 FROM mr_file_changes - WHERE (new_path = ?1 OR old_path = ?1) - AND (?2 IS NULL OR project_id = ?2) - LIMIT 1", - rusqlite::params![trimmed, project_id], - |_| Ok(()), - ) - .is_ok(); - - // Probe 2: directory prefix exists in DiffNotes OR mr_file_changes (project-scoped) - // Checks both new_path and old_path to support querying renamed directories. - let prefix_exists = if !forced_dir && !exact_exists { - let escaped = escape_like(trimmed); - let pat = format!("{escaped}/%"); - conn.query_row( - "SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created - WHERE note_type = 'DiffNote' - AND is_system = 0 - AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_old_path LIKE ?1 ESCAPE '\\') - AND (?2 IS NULL OR project_id = ?2) - LIMIT 1", - rusqlite::params![pat, project_id], - |_| Ok(()), - ) - .is_ok() - || conn - .query_row( - "SELECT 1 FROM mr_file_changes - WHERE (new_path LIKE ?1 ESCAPE '\\' OR old_path LIKE ?1 ESCAPE '\\') - AND (?2 IS NULL OR project_id = ?2) - LIMIT 1", - rusqlite::params![pat, project_id], - |_| Ok(()), - ) - .is_ok() - } else { - false - }; - - // Probe 3: suffix match — user typed a bare filename or partial path that - // doesn't exist as-is. Search for full paths ending with /input (or equal to input). - // This handles "login.rs" matching "src/auth/login.rs". - let suffix_resolved = if !forced_dir && !exact_exists && !prefix_exists && looks_like_file { - suffix_probe(conn, trimmed, project_id)? - } else { - SuffixResult::NotAttempted - }; - - match suffix_resolved { - SuffixResult::Unique(full_path) => Ok(PathQuery { - value: full_path, - is_prefix: false, - }), - SuffixResult::Ambiguous(candidates) => { - let list = candidates - .iter() - .map(|p| format!(" {p}")) - .collect::>() - .join("\n"); - Err(LoreError::Ambiguous(format!( - "'{trimmed}' matches multiple paths. Use the full path or -p to scope:\n{list}" - ))) - } - SuffixResult::NotAttempted | SuffixResult::NoMatch => { - // Original logic: exact > prefix > heuristic - let is_file = if forced_dir { - false - } else if exact_exists { - true - } else if prefix_exists { - false - } else { - looks_like_file - }; - - if is_file { - Ok(PathQuery { - value: trimmed.to_string(), - is_prefix: false, - }) - } else { - let escaped = escape_like(trimmed); - Ok(PathQuery { - value: format!("{escaped}/%"), - is_prefix: true, - }) - } - } - } -} - -/// Result of a suffix probe against the DB. -enum SuffixResult { - /// Suffix probe was not attempted (conditions not met). - NotAttempted, - /// No paths matched the suffix. - NoMatch, - /// Exactly one distinct path matched — auto-resolve. - Unique(String), - /// Multiple distinct paths matched — user must disambiguate. - Ambiguous(Vec), -} - -/// Probe both notes and mr_file_changes for paths ending with the given suffix. -/// Searches both new_path and old_path columns to support renamed file resolution. -/// Returns up to 11 distinct candidates (enough to detect ambiguity + show a useful list). -fn suffix_probe(conn: &Connection, suffix: &str, project_id: Option) -> Result { - let escaped = escape_like(suffix); - let suffix_pat = format!("%/{escaped}"); - - let mut stmt = conn.prepare_cached( - "SELECT DISTINCT full_path FROM ( - SELECT position_new_path AS full_path - FROM notes INDEXED BY idx_notes_diffnote_path_created - WHERE note_type = 'DiffNote' - AND is_system = 0 - AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_new_path = ?2) - AND (?3 IS NULL OR project_id = ?3) - UNION - SELECT new_path AS full_path FROM mr_file_changes - WHERE (new_path LIKE ?1 ESCAPE '\\' OR new_path = ?2) - AND (?3 IS NULL OR project_id = ?3) - UNION - SELECT position_old_path AS full_path FROM notes - WHERE note_type = 'DiffNote' - AND is_system = 0 - AND position_old_path IS NOT NULL - AND (position_old_path LIKE ?1 ESCAPE '\\' OR position_old_path = ?2) - AND (?3 IS NULL OR project_id = ?3) - UNION - SELECT old_path AS full_path FROM mr_file_changes - WHERE old_path IS NOT NULL - AND (old_path LIKE ?1 ESCAPE '\\' OR old_path = ?2) - AND (?3 IS NULL OR project_id = ?3) - ) - ORDER BY full_path - LIMIT 11", - )?; - - let candidates: Vec = stmt - .query_map(rusqlite::params![suffix_pat, suffix, project_id], |row| { - row.get(0) - })? - .collect::, _>>()?; - - match candidates.len() { - 0 => Ok(SuffixResult::NoMatch), - 1 => Ok(SuffixResult::Unique(candidates.into_iter().next().unwrap())), - _ => Ok(SuffixResult::Ambiguous(candidates)), - } -} - -/// Escape LIKE metacharacters. All queries using this must include `ESCAPE '\'`. -fn escape_like(input: &str) -> String { - input - .replace('\\', "\\\\") - .replace('%', "\\%") - .replace('_', "\\_") -} - // ─── Scoring Helpers ───────────────────────────────────────────────────────── /// Exponential half-life decay: `2^(-days / half_life)`. @@ -1203,11 +978,10 @@ fn query_expert_details( .collect(); let in_clause = placeholders.join(","); - let notes_indexed_by = "INDEXED BY idx_notes_diffnote_path_created"; let sql = format!( " WITH signals AS ( - -- 1. DiffNote reviewer + -- 1. DiffNote reviewer (matches both new_path and old_path for renamed files) SELECT n.author_username AS username, 'reviewer' AS role, @@ -1216,7 +990,7 @@ fn query_expert_details( m.title AS title, COUNT(*) AS note_count, MAX(n.created_at) AS last_activity - FROM notes n {notes_indexed_by} + FROM notes n JOIN discussions d ON n.discussion_id = d.id JOIN merge_requests m ON d.merge_request_id = m.id JOIN projects p ON m.project_id = p.id @@ -1224,8 +998,9 @@ fn query_expert_details( AND n.is_system = 0 AND n.author_username IS NOT NULL AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND m.state IN ('opened','merged') - AND n.position_new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (n.position_new_path {path_op} + OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op})) AND n.created_at >= ?2 AND (?3 IS NULL OR n.project_id = ?3) AND n.author_username IN ({in_clause}) @@ -1233,7 +1008,7 @@ fn query_expert_details( UNION ALL - -- 2. DiffNote MR author + -- 2. DiffNote MR author (matches both new_path and old_path for renamed files) SELECT m.author_username AS username, 'author' AS role, @@ -1244,13 +1019,14 @@ fn query_expert_details( MAX(n.created_at) AS last_activity FROM merge_requests m JOIN discussions d ON d.merge_request_id = m.id - JOIN notes n {notes_indexed_by} ON n.discussion_id = d.id + JOIN notes n ON n.discussion_id = d.id JOIN projects p ON m.project_id = p.id WHERE n.note_type = 'DiffNote' AND n.is_system = 0 AND m.author_username IS NOT NULL - AND m.state IN ('opened','merged') - AND n.position_new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (n.position_new_path {path_op} + OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op})) AND n.created_at >= ?2 AND (?3 IS NULL OR n.project_id = ?3) AND m.author_username IN ({in_clause}) @@ -1258,7 +1034,7 @@ fn query_expert_details( UNION ALL - -- 3. MR author via file changes + -- 3. MR author via file changes (matches both new_path and old_path) SELECT m.author_username AS username, 'author' AS role, @@ -1271,15 +1047,16 @@ fn query_expert_details( JOIN merge_requests m ON fc.merge_request_id = m.id JOIN projects p ON m.project_id = p.id WHERE m.author_username IS NOT NULL - AND m.state IN ('opened','merged') - AND fc.new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (fc.new_path {path_op} + OR (fc.old_path IS NOT NULL AND fc.old_path {path_op})) AND m.updated_at >= ?2 AND (?3 IS NULL OR fc.project_id = ?3) AND m.author_username IN ({in_clause}) UNION ALL - -- 4. MR reviewer via file changes + mr_reviewers + -- 4. MR reviewer via file changes + mr_reviewers (matches both new_path and old_path) SELECT r.username AS username, 'reviewer' AS role, @@ -1294,8 +1071,9 @@ fn query_expert_details( JOIN mr_reviewers r ON r.merge_request_id = m.id WHERE r.username IS NOT NULL AND (m.author_username IS NULL OR r.username != m.author_username) - AND m.state IN ('opened','merged') - AND fc.new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (fc.new_path {path_op} + OR (fc.old_path IS NOT NULL AND fc.old_path {path_op})) AND m.updated_at >= ?2 AND (?3 IS NULL OR fc.project_id = ?3) AND r.username IN ({in_clause}) @@ -1874,50 +1652,51 @@ fn query_overlap( } else { "= ?1" }; - // Force the partial index on DiffNote queries (same rationale as expert mode). - // Without this hint SQLite picks idx_notes_system (38% of rows) instead of - // idx_notes_diffnote_path_created (9.3% of rows): measured 50-133x slower. - let notes_indexed_by = "INDEXED BY idx_notes_diffnote_path_created"; + // Match both new_path and old_path to capture activity on renamed files. + // INDEXED BY removed to allow OR across path columns; overlap runs once + // per command so the minor plan difference is acceptable. let sql = format!( "SELECT username, role, touch_count, last_seen_at, mr_refs FROM ( - -- 1. DiffNote reviewer + -- 1. DiffNote reviewer (matches both new_path and old_path) SELECT n.author_username AS username, 'reviewer' AS role, COUNT(DISTINCT m.id) AS touch_count, MAX(n.created_at) AS last_seen_at, GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM notes n {notes_indexed_by} + FROM notes n JOIN discussions d ON n.discussion_id = d.id JOIN merge_requests m ON d.merge_request_id = m.id JOIN projects p ON m.project_id = p.id WHERE n.note_type = 'DiffNote' - AND n.position_new_path {path_op} + AND (n.position_new_path {path_op} + OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op})) AND n.is_system = 0 AND n.author_username IS NOT NULL AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND m.state IN ('opened','merged') + AND m.state IN ('opened','merged','closed') AND n.created_at >= ?2 AND (?3 IS NULL OR n.project_id = ?3) GROUP BY n.author_username UNION ALL - -- 2. DiffNote MR author + -- 2. DiffNote MR author (matches both new_path and old_path) SELECT m.author_username AS username, 'author' AS role, COUNT(DISTINCT m.id) AS touch_count, MAX(n.created_at) AS last_seen_at, GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM notes n {notes_indexed_by} + FROM notes n JOIN discussions d ON n.discussion_id = d.id JOIN merge_requests m ON d.merge_request_id = m.id JOIN projects p ON m.project_id = p.id WHERE n.note_type = 'DiffNote' - AND n.position_new_path {path_op} + AND (n.position_new_path {path_op} + OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op})) AND n.is_system = 0 - AND m.state IN ('opened', 'merged') + AND m.state IN ('opened','merged','closed') AND m.author_username IS NOT NULL AND n.created_at >= ?2 AND (?3 IS NULL OR n.project_id = ?3) @@ -1925,7 +1704,7 @@ fn query_overlap( UNION ALL - -- 3. MR author via file changes + -- 3. MR author via file changes (matches both new_path and old_path) SELECT m.author_username AS username, 'author' AS role, @@ -1936,15 +1715,16 @@ fn query_overlap( JOIN merge_requests m ON fc.merge_request_id = m.id JOIN projects p ON m.project_id = p.id WHERE m.author_username IS NOT NULL - AND m.state IN ('opened','merged') - AND fc.new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (fc.new_path {path_op} + OR (fc.old_path IS NOT NULL AND fc.old_path {path_op})) AND m.updated_at >= ?2 AND (?3 IS NULL OR fc.project_id = ?3) GROUP BY m.author_username UNION ALL - -- 4. MR reviewer via file changes + mr_reviewers + -- 4. MR reviewer via file changes + mr_reviewers (matches both new_path and old_path) SELECT r.username AS username, 'reviewer' AS role, @@ -1957,8 +1737,9 @@ fn query_overlap( JOIN mr_reviewers r ON r.merge_request_id = m.id WHERE r.username IS NOT NULL AND (m.author_username IS NULL OR r.username != m.author_username) - AND m.state IN ('opened','merged') - AND fc.new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (fc.new_path {path_op} + OR (fc.old_path IS NOT NULL AND fc.old_path {path_op})) AND m.updated_at >= ?2 AND (?3 IS NULL OR fc.project_id = ?3) GROUP BY r.username diff --git a/src/core/mod.rs b/src/core/mod.rs index 30a288a..bbfbc63 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -9,6 +9,7 @@ pub mod lock; pub mod logging; pub mod metrics; pub mod note_parser; +pub mod path_resolver; pub mod paths; pub mod payloads; pub mod project; diff --git a/src/core/path_resolver.rs b/src/core/path_resolver.rs new file mode 100644 index 0000000..30df73c --- /dev/null +++ b/src/core/path_resolver.rs @@ -0,0 +1,244 @@ +use rusqlite::Connection; + +use super::error::{LoreError, Result}; + +// ─── SQL Helpers ───────────────────────────────────────────────────────────── + +/// Escape LIKE metacharacters (`%`, `_`, `\`). +/// All queries using this must include `ESCAPE '\'`. +pub fn escape_like(input: &str) -> String { + input + .replace('\\', "\\\\") + .replace('%', "\\%") + .replace('_', "\\_") +} + +/// Normalize user-supplied repo paths to match stored DiffNote / file-change paths. +/// - trims whitespace +/// - strips leading "./" and "/" (repo-relative paths) +/// - converts '\' to '/' when no '/' present (Windows paste) +/// - collapses repeated "//" +pub fn normalize_repo_path(input: &str) -> String { + let mut s = input.trim().to_string(); + // Windows backslash normalization (only when no forward slashes present) + if s.contains('\\') && !s.contains('/') { + s = s.replace('\\', "/"); + } + // Strip leading ./ + while s.starts_with("./") { + s = s[2..].to_string(); + } + // Strip leading / + s = s.trim_start_matches('/').to_string(); + // Collapse repeated // + while s.contains("//") { + s = s.replace("//", "/"); + } + s +} + +// ─── Path Query Resolution ────────────────────────────────────────────────── + +/// Describes how to match a user-supplied path in SQL. +#[derive(Debug)] +pub struct PathQuery { + /// The parameter value to bind. + pub value: String, + /// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`. + pub is_prefix: bool, +} + +/// Result of a suffix probe against the DB. +pub enum SuffixResult { + /// Suffix probe was not attempted (conditions not met). + NotAttempted, + /// No paths matched the suffix. + NoMatch, + /// Exactly one distinct path matched — auto-resolve. + Unique(String), + /// Multiple distinct paths matched — user must disambiguate. + Ambiguous(Vec), +} + +/// Build a path query from a user-supplied path, with project-scoped DB probes. +/// +/// Resolution strategy (in priority order): +/// 1. Trailing `/` → directory prefix (LIKE `path/%`) +/// 2. Exact match probe against notes + `mr_file_changes` → exact (= `path`) +/// 3. Directory prefix probe → prefix (LIKE `path/%`) +/// 4. Suffix probe for bare filenames → auto-resolve or ambiguity error +/// 5. Heuristic fallback: `.` in last segment → file, else → directory prefix +pub fn build_path_query( + conn: &Connection, + path: &str, + project_id: Option, +) -> Result { + let trimmed = path.trim_end_matches('/'); + let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); + let is_root = !trimmed.contains('/'); + let forced_dir = path.ends_with('/'); + // Heuristic is now only a fallback; probes decide first when ambiguous. + let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); + + // Probe 1: exact file exists in DiffNotes OR mr_file_changes (project-scoped) + // Checks both new_path and old_path to support querying renamed files. + let exact_exists = conn + .query_row( + "SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND (position_new_path = ?1 OR position_old_path = ?1) + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![trimmed, project_id], + |_| Ok(()), + ) + .is_ok() + || conn + .query_row( + "SELECT 1 FROM mr_file_changes + WHERE (new_path = ?1 OR old_path = ?1) + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![trimmed, project_id], + |_| Ok(()), + ) + .is_ok(); + + // Probe 2: directory prefix exists in DiffNotes OR mr_file_changes (project-scoped) + let prefix_exists = if !forced_dir && !exact_exists { + let escaped = escape_like(trimmed); + let pat = format!("{escaped}/%"); + conn.query_row( + "SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_old_path LIKE ?1 ESCAPE '\\') + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![pat, project_id], + |_| Ok(()), + ) + .is_ok() + || conn + .query_row( + "SELECT 1 FROM mr_file_changes + WHERE (new_path LIKE ?1 ESCAPE '\\' OR old_path LIKE ?1 ESCAPE '\\') + AND (?2 IS NULL OR project_id = ?2) + LIMIT 1", + rusqlite::params![pat, project_id], + |_| Ok(()), + ) + .is_ok() + } else { + false + }; + + // Probe 3: suffix match — user typed a bare filename or partial path that + // doesn't exist as-is. Search for full paths ending with /input (or equal to input). + // This handles "login.rs" matching "src/auth/login.rs". + let suffix_resolved = if !forced_dir && !exact_exists && !prefix_exists && looks_like_file { + suffix_probe(conn, trimmed, project_id)? + } else { + SuffixResult::NotAttempted + }; + + match suffix_resolved { + SuffixResult::Unique(full_path) => Ok(PathQuery { + value: full_path, + is_prefix: false, + }), + SuffixResult::Ambiguous(candidates) => { + let list = candidates + .iter() + .map(|p| format!(" {p}")) + .collect::>() + .join("\n"); + Err(LoreError::Ambiguous(format!( + "'{trimmed}' matches multiple paths. Use the full path or -p to scope:\n{list}" + ))) + } + SuffixResult::NotAttempted | SuffixResult::NoMatch => { + // Original logic: exact > prefix > heuristic + let is_file = if forced_dir { + false + } else if exact_exists { + true + } else if prefix_exists { + false + } else { + looks_like_file + }; + + if is_file { + Ok(PathQuery { + value: trimmed.to_string(), + is_prefix: false, + }) + } else { + let escaped = escape_like(trimmed); + Ok(PathQuery { + value: format!("{escaped}/%"), + is_prefix: true, + }) + } + } + } +} + +/// Probe both notes and mr_file_changes for paths ending with the given suffix. +/// Searches both new_path and old_path columns to support renamed file resolution. +/// Returns up to 11 distinct candidates (enough to detect ambiguity + show a useful list). +pub fn suffix_probe( + conn: &Connection, + suffix: &str, + project_id: Option, +) -> Result { + let escaped = escape_like(suffix); + let suffix_pat = format!("%/{escaped}"); + + let mut stmt = conn.prepare_cached( + "SELECT DISTINCT full_path FROM ( + SELECT position_new_path AS full_path + FROM notes INDEXED BY idx_notes_diffnote_path_created + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_new_path = ?2) + AND (?3 IS NULL OR project_id = ?3) + UNION + SELECT new_path AS full_path FROM mr_file_changes + WHERE (new_path LIKE ?1 ESCAPE '\\' OR new_path = ?2) + AND (?3 IS NULL OR project_id = ?3) + UNION + SELECT position_old_path AS full_path FROM notes + WHERE note_type = 'DiffNote' + AND is_system = 0 + AND position_old_path IS NOT NULL + AND (position_old_path LIKE ?1 ESCAPE '\\' OR position_old_path = ?2) + AND (?3 IS NULL OR project_id = ?3) + UNION + SELECT old_path AS full_path FROM mr_file_changes + WHERE old_path IS NOT NULL + AND (old_path LIKE ?1 ESCAPE '\\' OR old_path = ?2) + AND (?3 IS NULL OR project_id = ?3) + ) + ORDER BY full_path + LIMIT 11", + )?; + + let candidates: Vec = stmt + .query_map(rusqlite::params![suffix_pat, suffix, project_id], |row| { + row.get(0) + })? + .collect::, _>>()?; + + match candidates.len() { + 0 => Ok(SuffixResult::NoMatch), + 1 => Ok(SuffixResult::Unique(candidates.into_iter().next().unwrap())), + _ => Ok(SuffixResult::Ambiguous(candidates)), + } +} + +#[cfg(test)] +#[path = "path_resolver_tests.rs"] +mod tests; diff --git a/src/core/path_resolver_tests.rs b/src/core/path_resolver_tests.rs new file mode 100644 index 0000000..ec6fe01 --- /dev/null +++ b/src/core/path_resolver_tests.rs @@ -0,0 +1,290 @@ +use super::*; +use crate::core::db::{create_connection, run_migrations}; +use std::path::Path; + +fn setup_test_db() -> Connection { + let conn = create_connection(Path::new(":memory:")).unwrap(); + run_migrations(&conn).unwrap(); + conn +} + +fn seed_project(conn: &Connection, id: i64) { + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (?1, ?1, 'group/repo', 'https://gl.example.com/group/repo', 1000, 2000)", + rusqlite::params![id], + ) + .unwrap(); +} + +fn seed_mr(conn: &Connection, mr_id: i64, project_id: i64) { + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \ + created_at, updated_at, last_seen_at, source_branch, target_branch) + VALUES (?1, ?1, ?1, ?2, 'MR', 'merged', 1000, 2000, 2000, 'feat', 'main')", + rusqlite::params![mr_id, project_id], + ) + .unwrap(); +} + +fn seed_file_change(conn: &Connection, mr_id: i64, project_id: i64, path: &str) { + conn.execute( + "INSERT INTO mr_file_changes (merge_request_id, project_id, new_path, change_type) + VALUES (?1, ?2, ?3, 'modified')", + rusqlite::params![mr_id, project_id, path], + ) + .unwrap(); +} + +fn seed_diffnote(conn: &Connection, id: i64, project_id: i64, path: &str) { + // Need a discussion first (MergeRequest type, linked to mr_id=1) + conn.execute( + "INSERT OR IGNORE INTO discussions (id, gitlab_discussion_id, project_id, \ + merge_request_id, noteable_type, resolvable, resolved, last_seen_at, last_note_at) + VALUES (?1, ?2, ?3, 1, 'MergeRequest', 1, 0, 2000, 2000)", + rusqlite::params![id, format!("disc-{id}"), project_id], + ) + .unwrap(); + conn.execute( + "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, note_type, is_system, \ + author_username, body, created_at, updated_at, last_seen_at, position_new_path) + VALUES (?1, ?1, ?1, ?2, 'DiffNote', 0, 'user', 'note', 1000, 2000, 2000, ?3)", + rusqlite::params![id, project_id, path], + ) + .unwrap(); +} + +// ─── escape_like ───────────────────────────────────────────────────────────── + +#[test] +fn test_escape_like() { + assert_eq!(escape_like("normal/path"), "normal/path"); + assert_eq!(escape_like("has_underscore"), "has\\_underscore"); + assert_eq!(escape_like("has%percent"), "has\\%percent"); + assert_eq!(escape_like("has\\backslash"), "has\\\\backslash"); +} + +// ─── normalize_repo_path ───────────────────────────────────────────────────── + +#[test] +fn test_normalize_repo_path() { + assert_eq!(normalize_repo_path("./src/foo/"), "src/foo/"); + assert_eq!(normalize_repo_path("/src/foo/"), "src/foo/"); + assert_eq!(normalize_repo_path("././src/foo"), "src/foo"); + assert_eq!(normalize_repo_path("src\\foo\\bar.rs"), "src/foo/bar.rs"); + assert_eq!(normalize_repo_path("src/foo\\bar"), "src/foo\\bar"); + assert_eq!(normalize_repo_path("src//foo//bar/"), "src/foo/bar/"); + assert_eq!(normalize_repo_path(" src/foo/ "), "src/foo/"); + assert_eq!(normalize_repo_path("src/foo/bar.rs"), "src/foo/bar.rs"); + assert_eq!(normalize_repo_path(""), ""); +} + +// ─── build_path_query heuristics (no DB data) ────────────────────────────── + +#[test] +fn test_trailing_slash_is_prefix() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "src/auth/", None).unwrap(); + assert_eq!(pq.value, "src/auth/%"); + assert!(pq.is_prefix); +} + +#[test] +fn test_no_dot_in_last_segment_is_prefix() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "src/auth", None).unwrap(); + assert_eq!(pq.value, "src/auth/%"); + assert!(pq.is_prefix); +} + +#[test] +fn test_file_extension_is_exact() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "src/auth/login.rs", None).unwrap(); + assert_eq!(pq.value, "src/auth/login.rs"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_root_file_is_exact() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "README.md", None).unwrap(); + assert_eq!(pq.value, "README.md"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_dotless_root_file_is_exact() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "Makefile", None).unwrap(); + assert_eq!(pq.value, "Makefile"); + assert!(!pq.is_prefix); + + let pq = build_path_query(&conn, "LICENSE", None).unwrap(); + assert_eq!(pq.value, "LICENSE"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_metacharacters_escaped_in_prefix() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "src/test_files/", None).unwrap(); + assert_eq!(pq.value, "src/test\\_files/%"); + assert!(pq.is_prefix); +} + +#[test] +fn test_exact_value_not_escaped() { + let conn = setup_test_db(); + let pq = build_path_query(&conn, "README_with_underscore.md", None).unwrap(); + assert_eq!(pq.value, "README_with_underscore.md"); + assert!(!pq.is_prefix); +} + +// ─── build_path_query DB probes ───────────────────────────────────────────── + +#[test] +fn test_db_probe_detects_dotless_file() { + // "src/Dockerfile" has no dot in last segment -> normally prefix. + // DB probe detects it's actually a file. + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_diffnote(&conn, 1, 1, "src/Dockerfile"); + + let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap(); + assert_eq!(pq.value, "src/Dockerfile"); + assert!(!pq.is_prefix); + + // Without DB data -> falls through to prefix + let empty = setup_test_db(); + let pq2 = build_path_query(&empty, "src/Dockerfile", None).unwrap(); + assert!(pq2.is_prefix); +} + +#[test] +fn test_db_probe_via_file_changes() { + // Exact match via mr_file_changes even without notes + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/Dockerfile"); + + let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap(); + assert_eq!(pq.value, "src/Dockerfile"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_db_probe_project_scoped() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_project(&conn, 2); + seed_mr(&conn, 1, 1); + seed_diffnote(&conn, 1, 1, "infra/Makefile"); + + // Unscoped: finds it + assert!( + !build_path_query(&conn, "infra/Makefile", None) + .unwrap() + .is_prefix + ); + // Scoped to project 1: finds it + assert!( + !build_path_query(&conn, "infra/Makefile", Some(1)) + .unwrap() + .is_prefix + ); + // Scoped to project 2: no data -> prefix + assert!( + build_path_query(&conn, "infra/Makefile", Some(2)) + .unwrap() + .is_prefix + ); +} + +// ─── suffix resolution ────────────────────────────────────────────────────── + +#[test] +fn test_suffix_resolves_bare_filename() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/auth/login.rs"); + + let pq = build_path_query(&conn, "login.rs", None).unwrap(); + assert_eq!(pq.value, "src/auth/login.rs"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_suffix_resolves_partial_path() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/auth/login.rs"); + + let pq = build_path_query(&conn, "auth/login.rs", None).unwrap(); + assert_eq!(pq.value, "src/auth/login.rs"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_suffix_ambiguous_returns_error() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/auth/utils.rs"); + seed_file_change(&conn, 1, 1, "src/db/utils.rs"); + + let err = build_path_query(&conn, "utils.rs", None).unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("src/auth/utils.rs"), "candidates: {msg}"); + assert!(msg.contains("src/db/utils.rs"), "candidates: {msg}"); +} + +#[test] +fn test_suffix_scoped_to_project() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_project(&conn, 2); + seed_mr(&conn, 1, 1); + seed_mr(&conn, 2, 2); + seed_file_change(&conn, 1, 1, "src/utils.rs"); + seed_file_change(&conn, 2, 2, "lib/utils.rs"); + + // Unscoped: ambiguous + assert!(build_path_query(&conn, "utils.rs", None).is_err()); + + // Scoped to project 1: resolves + let pq = build_path_query(&conn, "utils.rs", Some(1)).unwrap(); + assert_eq!(pq.value, "src/utils.rs"); +} + +#[test] +fn test_suffix_deduplicates_across_sources() { + // Same path in notes AND file_changes -> single match, not ambiguous + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "src/auth/login.rs"); + seed_diffnote(&conn, 1, 1, "src/auth/login.rs"); + + let pq = build_path_query(&conn, "login.rs", None).unwrap(); + assert_eq!(pq.value, "src/auth/login.rs"); + assert!(!pq.is_prefix); +} + +#[test] +fn test_exact_match_preferred_over_suffix() { + let conn = setup_test_db(); + seed_project(&conn, 1); + seed_mr(&conn, 1, 1); + seed_file_change(&conn, 1, 1, "README.md"); + seed_file_change(&conn, 1, 1, "docs/README.md"); + + // "README.md" exists as exact match -> no ambiguity + let pq = build_path_query(&conn, "README.md", None).unwrap(); + assert_eq!(pq.value, "README.md"); + assert!(!pq.is_prefix); +} diff --git a/src/core/project.rs b/src/core/project.rs index 0f362aa..f3b968b 100644 --- a/src/core/project.rs +++ b/src/core/project.rs @@ -1,6 +1,7 @@ use rusqlite::Connection; use super::error::{LoreError, Result}; +use super::path_resolver::escape_like; pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { let exact = conn.query_row( @@ -106,13 +107,6 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { /// Escape LIKE metacharacters so `%` and `_` in user input are treated as /// literals. All queries using this must include `ESCAPE '\'`. -fn escape_like(input: &str) -> String { - input - .replace('\\', "\\\\") - .replace('%', "\\%") - .replace('_', "\\_") -} - #[cfg(test)] #[path = "project_tests.rs"] mod tests; diff --git a/src/search/filters.rs b/src/search/filters.rs index 83acb7f..018f059 100644 --- a/src/search/filters.rs +++ b/src/search/filters.rs @@ -1,4 +1,5 @@ use crate::core::error::Result; +use crate::core::path_resolver::escape_like; use crate::documents::SourceType; use rusqlite::Connection; @@ -43,12 +44,6 @@ impl SearchFilters { } } -fn escape_like(s: &str) -> String { - s.replace('\\', "\\\\") - .replace('%', "\\%") - .replace('_', "\\_") -} - pub fn apply_filters( conn: &Connection, document_ids: &[i64],