diff --git a/src/cli/commands/who.rs b/src/cli/commands/who.rs index 29bbb48..331c5c0 100644 --- a/src/cli/commands/who.rs +++ b/src/cli/commands/who.rs @@ -9,6 +9,9 @@ use crate::cli::robot::RobotMeta; use crate::core::config::ScoringConfig; use crate::core::db::create_connection; use crate::core::error::{LoreError, Result}; +use crate::core::path_resolver::{PathQuery, build_path_query, normalize_repo_path}; +#[cfg(test)] +use crate::core::path_resolver::{SuffixResult, escape_like, suffix_probe}; use crate::core::paths::get_db_path; use crate::core::project::resolve_project; use crate::core::time::{ms_to_iso, now_ms, parse_since, parse_since_from}; @@ -75,30 +78,6 @@ fn resolve_mode<'a>(args: &'a WhoArgs) -> Result> { )) } -/// Normalize user-supplied repo paths to match stored DiffNote paths. -/// - trims whitespace -/// - strips leading "./" and "/" (repo-relative paths) -/// - converts '\' to '/' when no '/' present (Windows paste) -/// - collapses repeated "//" -fn normalize_repo_path(input: &str) -> String { - let mut s = input.trim().to_string(); - // Windows backslash normalization (only when no forward slashes present) - if s.contains('\\') && !s.contains('/') { - s = s.replace('\\', "/"); - } - // Strip leading ./ - while s.starts_with("./") { - s = s[2..].to_string(); - } - // Strip leading / - s = s.trim_start_matches('/').to_string(); - // Collapse repeated // - while s.contains("//") { - s = s.replace("//", "/"); - } - s -} - // ─── Result Types ──────────────────────────────────────────────────────────── /// Top-level run result: carries resolved inputs + the mode-specific result. @@ -485,210 +464,6 @@ fn resolve_since_required(input: &str) -> Result { // ─── Path Query Construction ───────────────────────────────────────────────── -/// Describes how to match a user-supplied path in SQL. -#[derive(Debug)] -struct PathQuery { - /// The parameter value to bind. - value: String, - /// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`. - is_prefix: bool, -} - -/// Build a path query from a user-supplied path, with project-scoped DB probes. -/// -/// Rules: -/// - If the path ends with `/`, it's a directory prefix -> `escaped_path/%` (LIKE) -/// - If the path is a root path (no `/`) and does NOT end with `/`, treat as exact (=) -/// - Else if the last path segment contains `.`, heuristic suggests file (=) -/// - Two-way DB probe (project-scoped): when heuristics are ambiguous, -/// probe the DB to resolve. -/// - Otherwise, treat as directory prefix -> `escaped_path/%` (LIKE) -fn build_path_query(conn: &Connection, path: &str, project_id: Option) -> Result { - let trimmed = path.trim_end_matches('/'); - let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed); - let is_root = !trimmed.contains('/'); - let forced_dir = path.ends_with('/'); - // Heuristic is now only a fallback; probes decide first when ambiguous. - let looks_like_file = !forced_dir && (is_root || last_segment.contains('.')); - - // Probe 1: exact file exists in DiffNotes OR mr_file_changes (project-scoped) - // Checks both new_path and old_path to support querying renamed files. - // Exact-match probes already use the partial index, but LIKE probes below - // benefit from the INDEXED BY hint (same planner issue as expert query). - let exact_exists = conn - .query_row( - "SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created - WHERE note_type = 'DiffNote' - AND is_system = 0 - AND (position_new_path = ?1 OR position_old_path = ?1) - AND (?2 IS NULL OR project_id = ?2) - LIMIT 1", - rusqlite::params![trimmed, project_id], - |_| Ok(()), - ) - .is_ok() - || conn - .query_row( - "SELECT 1 FROM mr_file_changes - WHERE (new_path = ?1 OR old_path = ?1) - AND (?2 IS NULL OR project_id = ?2) - LIMIT 1", - rusqlite::params![trimmed, project_id], - |_| Ok(()), - ) - .is_ok(); - - // Probe 2: directory prefix exists in DiffNotes OR mr_file_changes (project-scoped) - // Checks both new_path and old_path to support querying renamed directories. - let prefix_exists = if !forced_dir && !exact_exists { - let escaped = escape_like(trimmed); - let pat = format!("{escaped}/%"); - conn.query_row( - "SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created - WHERE note_type = 'DiffNote' - AND is_system = 0 - AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_old_path LIKE ?1 ESCAPE '\\') - AND (?2 IS NULL OR project_id = ?2) - LIMIT 1", - rusqlite::params![pat, project_id], - |_| Ok(()), - ) - .is_ok() - || conn - .query_row( - "SELECT 1 FROM mr_file_changes - WHERE (new_path LIKE ?1 ESCAPE '\\' OR old_path LIKE ?1 ESCAPE '\\') - AND (?2 IS NULL OR project_id = ?2) - LIMIT 1", - rusqlite::params![pat, project_id], - |_| Ok(()), - ) - .is_ok() - } else { - false - }; - - // Probe 3: suffix match — user typed a bare filename or partial path that - // doesn't exist as-is. Search for full paths ending with /input (or equal to input). - // This handles "login.rs" matching "src/auth/login.rs". - let suffix_resolved = if !forced_dir && !exact_exists && !prefix_exists && looks_like_file { - suffix_probe(conn, trimmed, project_id)? - } else { - SuffixResult::NotAttempted - }; - - match suffix_resolved { - SuffixResult::Unique(full_path) => Ok(PathQuery { - value: full_path, - is_prefix: false, - }), - SuffixResult::Ambiguous(candidates) => { - let list = candidates - .iter() - .map(|p| format!(" {p}")) - .collect::>() - .join("\n"); - Err(LoreError::Ambiguous(format!( - "'{trimmed}' matches multiple paths. Use the full path or -p to scope:\n{list}" - ))) - } - SuffixResult::NotAttempted | SuffixResult::NoMatch => { - // Original logic: exact > prefix > heuristic - let is_file = if forced_dir { - false - } else if exact_exists { - true - } else if prefix_exists { - false - } else { - looks_like_file - }; - - if is_file { - Ok(PathQuery { - value: trimmed.to_string(), - is_prefix: false, - }) - } else { - let escaped = escape_like(trimmed); - Ok(PathQuery { - value: format!("{escaped}/%"), - is_prefix: true, - }) - } - } - } -} - -/// Result of a suffix probe against the DB. -enum SuffixResult { - /// Suffix probe was not attempted (conditions not met). - NotAttempted, - /// No paths matched the suffix. - NoMatch, - /// Exactly one distinct path matched — auto-resolve. - Unique(String), - /// Multiple distinct paths matched — user must disambiguate. - Ambiguous(Vec), -} - -/// Probe both notes and mr_file_changes for paths ending with the given suffix. -/// Searches both new_path and old_path columns to support renamed file resolution. -/// Returns up to 11 distinct candidates (enough to detect ambiguity + show a useful list). -fn suffix_probe(conn: &Connection, suffix: &str, project_id: Option) -> Result { - let escaped = escape_like(suffix); - let suffix_pat = format!("%/{escaped}"); - - let mut stmt = conn.prepare_cached( - "SELECT DISTINCT full_path FROM ( - SELECT position_new_path AS full_path - FROM notes INDEXED BY idx_notes_diffnote_path_created - WHERE note_type = 'DiffNote' - AND is_system = 0 - AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_new_path = ?2) - AND (?3 IS NULL OR project_id = ?3) - UNION - SELECT new_path AS full_path FROM mr_file_changes - WHERE (new_path LIKE ?1 ESCAPE '\\' OR new_path = ?2) - AND (?3 IS NULL OR project_id = ?3) - UNION - SELECT position_old_path AS full_path FROM notes - WHERE note_type = 'DiffNote' - AND is_system = 0 - AND position_old_path IS NOT NULL - AND (position_old_path LIKE ?1 ESCAPE '\\' OR position_old_path = ?2) - AND (?3 IS NULL OR project_id = ?3) - UNION - SELECT old_path AS full_path FROM mr_file_changes - WHERE old_path IS NOT NULL - AND (old_path LIKE ?1 ESCAPE '\\' OR old_path = ?2) - AND (?3 IS NULL OR project_id = ?3) - ) - ORDER BY full_path - LIMIT 11", - )?; - - let candidates: Vec = stmt - .query_map(rusqlite::params![suffix_pat, suffix, project_id], |row| { - row.get(0) - })? - .collect::, _>>()?; - - match candidates.len() { - 0 => Ok(SuffixResult::NoMatch), - 1 => Ok(SuffixResult::Unique(candidates.into_iter().next().unwrap())), - _ => Ok(SuffixResult::Ambiguous(candidates)), - } -} - -/// Escape LIKE metacharacters. All queries using this must include `ESCAPE '\'`. -fn escape_like(input: &str) -> String { - input - .replace('\\', "\\\\") - .replace('%', "\\%") - .replace('_', "\\_") -} - // ─── Scoring Helpers ───────────────────────────────────────────────────────── /// Exponential half-life decay: `2^(-days / half_life)`. @@ -1203,11 +978,10 @@ fn query_expert_details( .collect(); let in_clause = placeholders.join(","); - let notes_indexed_by = "INDEXED BY idx_notes_diffnote_path_created"; let sql = format!( " WITH signals AS ( - -- 1. DiffNote reviewer + -- 1. DiffNote reviewer (matches both new_path and old_path for renamed files) SELECT n.author_username AS username, 'reviewer' AS role, @@ -1216,7 +990,7 @@ fn query_expert_details( m.title AS title, COUNT(*) AS note_count, MAX(n.created_at) AS last_activity - FROM notes n {notes_indexed_by} + FROM notes n JOIN discussions d ON n.discussion_id = d.id JOIN merge_requests m ON d.merge_request_id = m.id JOIN projects p ON m.project_id = p.id @@ -1224,8 +998,9 @@ fn query_expert_details( AND n.is_system = 0 AND n.author_username IS NOT NULL AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND m.state IN ('opened','merged') - AND n.position_new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (n.position_new_path {path_op} + OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op})) AND n.created_at >= ?2 AND (?3 IS NULL OR n.project_id = ?3) AND n.author_username IN ({in_clause}) @@ -1233,7 +1008,7 @@ fn query_expert_details( UNION ALL - -- 2. DiffNote MR author + -- 2. DiffNote MR author (matches both new_path and old_path for renamed files) SELECT m.author_username AS username, 'author' AS role, @@ -1244,13 +1019,14 @@ fn query_expert_details( MAX(n.created_at) AS last_activity FROM merge_requests m JOIN discussions d ON d.merge_request_id = m.id - JOIN notes n {notes_indexed_by} ON n.discussion_id = d.id + JOIN notes n ON n.discussion_id = d.id JOIN projects p ON m.project_id = p.id WHERE n.note_type = 'DiffNote' AND n.is_system = 0 AND m.author_username IS NOT NULL - AND m.state IN ('opened','merged') - AND n.position_new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (n.position_new_path {path_op} + OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op})) AND n.created_at >= ?2 AND (?3 IS NULL OR n.project_id = ?3) AND m.author_username IN ({in_clause}) @@ -1258,7 +1034,7 @@ fn query_expert_details( UNION ALL - -- 3. MR author via file changes + -- 3. MR author via file changes (matches both new_path and old_path) SELECT m.author_username AS username, 'author' AS role, @@ -1271,15 +1047,16 @@ fn query_expert_details( JOIN merge_requests m ON fc.merge_request_id = m.id JOIN projects p ON m.project_id = p.id WHERE m.author_username IS NOT NULL - AND m.state IN ('opened','merged') - AND fc.new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (fc.new_path {path_op} + OR (fc.old_path IS NOT NULL AND fc.old_path {path_op})) AND m.updated_at >= ?2 AND (?3 IS NULL OR fc.project_id = ?3) AND m.author_username IN ({in_clause}) UNION ALL - -- 4. MR reviewer via file changes + mr_reviewers + -- 4. MR reviewer via file changes + mr_reviewers (matches both new_path and old_path) SELECT r.username AS username, 'reviewer' AS role, @@ -1294,8 +1071,9 @@ fn query_expert_details( JOIN mr_reviewers r ON r.merge_request_id = m.id WHERE r.username IS NOT NULL AND (m.author_username IS NULL OR r.username != m.author_username) - AND m.state IN ('opened','merged') - AND fc.new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (fc.new_path {path_op} + OR (fc.old_path IS NOT NULL AND fc.old_path {path_op})) AND m.updated_at >= ?2 AND (?3 IS NULL OR fc.project_id = ?3) AND r.username IN ({in_clause}) @@ -1874,50 +1652,51 @@ fn query_overlap( } else { "= ?1" }; - // Force the partial index on DiffNote queries (same rationale as expert mode). - // Without this hint SQLite picks idx_notes_system (38% of rows) instead of - // idx_notes_diffnote_path_created (9.3% of rows): measured 50-133x slower. - let notes_indexed_by = "INDEXED BY idx_notes_diffnote_path_created"; + // Match both new_path and old_path to capture activity on renamed files. + // INDEXED BY removed to allow OR across path columns; overlap runs once + // per command so the minor plan difference is acceptable. let sql = format!( "SELECT username, role, touch_count, last_seen_at, mr_refs FROM ( - -- 1. DiffNote reviewer + -- 1. DiffNote reviewer (matches both new_path and old_path) SELECT n.author_username AS username, 'reviewer' AS role, COUNT(DISTINCT m.id) AS touch_count, MAX(n.created_at) AS last_seen_at, GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM notes n {notes_indexed_by} + FROM notes n JOIN discussions d ON n.discussion_id = d.id JOIN merge_requests m ON d.merge_request_id = m.id JOIN projects p ON m.project_id = p.id WHERE n.note_type = 'DiffNote' - AND n.position_new_path {path_op} + AND (n.position_new_path {path_op} + OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op})) AND n.is_system = 0 AND n.author_username IS NOT NULL AND (m.author_username IS NULL OR n.author_username != m.author_username) - AND m.state IN ('opened','merged') + AND m.state IN ('opened','merged','closed') AND n.created_at >= ?2 AND (?3 IS NULL OR n.project_id = ?3) GROUP BY n.author_username UNION ALL - -- 2. DiffNote MR author + -- 2. DiffNote MR author (matches both new_path and old_path) SELECT m.author_username AS username, 'author' AS role, COUNT(DISTINCT m.id) AS touch_count, MAX(n.created_at) AS last_seen_at, GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs - FROM notes n {notes_indexed_by} + FROM notes n JOIN discussions d ON n.discussion_id = d.id JOIN merge_requests m ON d.merge_request_id = m.id JOIN projects p ON m.project_id = p.id WHERE n.note_type = 'DiffNote' - AND n.position_new_path {path_op} + AND (n.position_new_path {path_op} + OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op})) AND n.is_system = 0 - AND m.state IN ('opened', 'merged') + AND m.state IN ('opened','merged','closed') AND m.author_username IS NOT NULL AND n.created_at >= ?2 AND (?3 IS NULL OR n.project_id = ?3) @@ -1925,7 +1704,7 @@ fn query_overlap( UNION ALL - -- 3. MR author via file changes + -- 3. MR author via file changes (matches both new_path and old_path) SELECT m.author_username AS username, 'author' AS role, @@ -1936,15 +1715,16 @@ fn query_overlap( JOIN merge_requests m ON fc.merge_request_id = m.id JOIN projects p ON m.project_id = p.id WHERE m.author_username IS NOT NULL - AND m.state IN ('opened','merged') - AND fc.new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (fc.new_path {path_op} + OR (fc.old_path IS NOT NULL AND fc.old_path {path_op})) AND m.updated_at >= ?2 AND (?3 IS NULL OR fc.project_id = ?3) GROUP BY m.author_username UNION ALL - -- 4. MR reviewer via file changes + mr_reviewers + -- 4. MR reviewer via file changes + mr_reviewers (matches both new_path and old_path) SELECT r.username AS username, 'reviewer' AS role, @@ -1957,8 +1737,9 @@ fn query_overlap( JOIN mr_reviewers r ON r.merge_request_id = m.id WHERE r.username IS NOT NULL AND (m.author_username IS NULL OR r.username != m.author_username) - AND m.state IN ('opened','merged') - AND fc.new_path {path_op} + AND m.state IN ('opened','merged','closed') + AND (fc.new_path {path_op} + OR (fc.old_path IS NOT NULL AND fc.old_path {path_op})) AND m.updated_at >= ?2 AND (?3 IS NULL OR fc.project_id = ?3) GROUP BY r.username