fix(who): match old_path in detail/overlap queries, include closed MRs

Fix two asymmetries between the main expert scoring query
(build_expert_sql_v2) and the auxiliary queries (query_expert_details
and query_overlap):

1. Missing old_path matching: The main scoring query correctly
   searches both position_new_path AND position_old_path (via UNION ALL
   branches) for DiffNotes, and both new_path AND old_path for
   mr_file_changes. However, query_expert_details and query_overlap
   only checked position_new_path / fc.new_path. This caused --detail
   mode and overlap mode to silently drop activity on renamed files
   that the main scoring correctly captured — the score and detail
   table wouldn't match.

2. State filter mismatch: The main query uses
   m.state IN ('opened','merged','closed') with a closed_mr_multiplier
   to downweight closed MRs. The detail and overlap queries used
   m.state IN ('opened','merged'), completely excluding closed MRs.
   This meant detail couldn't fully explain an expert's score.

Fix: Add OR clauses for old_path in all signal branches of both
queries, and include 'closed' in the state filter. The INDEXED BY
hints are removed from the auxiliary queries (they use OR across path
columns), which is acceptable since these run once per command.

Also imports build_path_query, normalize_repo_path, and PathQuery
from the new core::path_resolver module, removing the previously
duplicated private functions (~261 lines deleted).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-13 12:37:21 -05:00
parent 1799842b59
commit 0034958faf

View File

@@ -9,6 +9,9 @@ use crate::cli::robot::RobotMeta;
use crate::core::config::ScoringConfig;
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::path_resolver::{PathQuery, build_path_query, normalize_repo_path};
#[cfg(test)]
use crate::core::path_resolver::{SuffixResult, escape_like, suffix_probe};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, now_ms, parse_since, parse_since_from};
@@ -75,30 +78,6 @@ fn resolve_mode<'a>(args: &'a WhoArgs) -> Result<WhoMode<'a>> {
))
}
/// Normalize user-supplied repo paths to match stored DiffNote paths.
/// - trims whitespace
/// - strips leading "./" and "/" (repo-relative paths)
/// - converts '\' to '/' when no '/' present (Windows paste)
/// - collapses repeated "//"
fn normalize_repo_path(input: &str) -> String {
let mut s = input.trim().to_string();
// Windows backslash normalization (only when no forward slashes present)
if s.contains('\\') && !s.contains('/') {
s = s.replace('\\', "/");
}
// Strip leading ./
while s.starts_with("./") {
s = s[2..].to_string();
}
// Strip leading /
s = s.trim_start_matches('/').to_string();
// Collapse repeated //
while s.contains("//") {
s = s.replace("//", "/");
}
s
}
// ─── Result Types ────────────────────────────────────────────────────────────
/// Top-level run result: carries resolved inputs + the mode-specific result.
@@ -485,210 +464,6 @@ fn resolve_since_required(input: &str) -> Result<i64> {
// ─── Path Query Construction ─────────────────────────────────────────────────
/// Describes how to match a user-supplied path in SQL.
#[derive(Debug)]
struct PathQuery {
/// The parameter value to bind.
value: String,
/// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`.
is_prefix: bool,
}
/// Build a path query from a user-supplied path, with project-scoped DB probes.
///
/// Rules:
/// - If the path ends with `/`, it's a directory prefix -> `escaped_path/%` (LIKE)
/// - If the path is a root path (no `/`) and does NOT end with `/`, treat as exact (=)
/// - Else if the last path segment contains `.`, heuristic suggests file (=)
/// - Two-way DB probe (project-scoped): when heuristics are ambiguous,
/// probe the DB to resolve.
/// - Otherwise, treat as directory prefix -> `escaped_path/%` (LIKE)
fn build_path_query(conn: &Connection, path: &str, project_id: Option<i64>) -> Result<PathQuery> {
let trimmed = path.trim_end_matches('/');
let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
let is_root = !trimmed.contains('/');
let forced_dir = path.ends_with('/');
// Heuristic is now only a fallback; probes decide first when ambiguous.
let looks_like_file = !forced_dir && (is_root || last_segment.contains('.'));
// Probe 1: exact file exists in DiffNotes OR mr_file_changes (project-scoped)
// Checks both new_path and old_path to support querying renamed files.
// Exact-match probes already use the partial index, but LIKE probes below
// benefit from the INDEXED BY hint (same planner issue as expert query).
let exact_exists = conn
.query_row(
"SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path = ?1 OR position_old_path = ?1)
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![trimmed, project_id],
|_| Ok(()),
)
.is_ok()
|| conn
.query_row(
"SELECT 1 FROM mr_file_changes
WHERE (new_path = ?1 OR old_path = ?1)
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![trimmed, project_id],
|_| Ok(()),
)
.is_ok();
// Probe 2: directory prefix exists in DiffNotes OR mr_file_changes (project-scoped)
// Checks both new_path and old_path to support querying renamed directories.
let prefix_exists = if !forced_dir && !exact_exists {
let escaped = escape_like(trimmed);
let pat = format!("{escaped}/%");
conn.query_row(
"SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_old_path LIKE ?1 ESCAPE '\\')
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![pat, project_id],
|_| Ok(()),
)
.is_ok()
|| conn
.query_row(
"SELECT 1 FROM mr_file_changes
WHERE (new_path LIKE ?1 ESCAPE '\\' OR old_path LIKE ?1 ESCAPE '\\')
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![pat, project_id],
|_| Ok(()),
)
.is_ok()
} else {
false
};
// Probe 3: suffix match — user typed a bare filename or partial path that
// doesn't exist as-is. Search for full paths ending with /input (or equal to input).
// This handles "login.rs" matching "src/auth/login.rs".
let suffix_resolved = if !forced_dir && !exact_exists && !prefix_exists && looks_like_file {
suffix_probe(conn, trimmed, project_id)?
} else {
SuffixResult::NotAttempted
};
match suffix_resolved {
SuffixResult::Unique(full_path) => Ok(PathQuery {
value: full_path,
is_prefix: false,
}),
SuffixResult::Ambiguous(candidates) => {
let list = candidates
.iter()
.map(|p| format!(" {p}"))
.collect::<Vec<_>>()
.join("\n");
Err(LoreError::Ambiguous(format!(
"'{trimmed}' matches multiple paths. Use the full path or -p to scope:\n{list}"
)))
}
SuffixResult::NotAttempted | SuffixResult::NoMatch => {
// Original logic: exact > prefix > heuristic
let is_file = if forced_dir {
false
} else if exact_exists {
true
} else if prefix_exists {
false
} else {
looks_like_file
};
if is_file {
Ok(PathQuery {
value: trimmed.to_string(),
is_prefix: false,
})
} else {
let escaped = escape_like(trimmed);
Ok(PathQuery {
value: format!("{escaped}/%"),
is_prefix: true,
})
}
}
}
}
/// Result of a suffix probe against the DB.
enum SuffixResult {
/// Suffix probe was not attempted (conditions not met).
NotAttempted,
/// No paths matched the suffix.
NoMatch,
/// Exactly one distinct path matched — auto-resolve.
Unique(String),
/// Multiple distinct paths matched — user must disambiguate.
Ambiguous(Vec<String>),
}
/// Probe both notes and mr_file_changes for paths ending with the given suffix.
/// Searches both new_path and old_path columns to support renamed file resolution.
/// Returns up to 11 distinct candidates (enough to detect ambiguity + show a useful list).
fn suffix_probe(conn: &Connection, suffix: &str, project_id: Option<i64>) -> Result<SuffixResult> {
let escaped = escape_like(suffix);
let suffix_pat = format!("%/{escaped}");
let mut stmt = conn.prepare_cached(
"SELECT DISTINCT full_path FROM (
SELECT position_new_path AS full_path
FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_new_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT new_path AS full_path FROM mr_file_changes
WHERE (new_path LIKE ?1 ESCAPE '\\' OR new_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT position_old_path AS full_path FROM notes
WHERE note_type = 'DiffNote'
AND is_system = 0
AND position_old_path IS NOT NULL
AND (position_old_path LIKE ?1 ESCAPE '\\' OR position_old_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT old_path AS full_path FROM mr_file_changes
WHERE old_path IS NOT NULL
AND (old_path LIKE ?1 ESCAPE '\\' OR old_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
)
ORDER BY full_path
LIMIT 11",
)?;
let candidates: Vec<String> = stmt
.query_map(rusqlite::params![suffix_pat, suffix, project_id], |row| {
row.get(0)
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
match candidates.len() {
0 => Ok(SuffixResult::NoMatch),
1 => Ok(SuffixResult::Unique(candidates.into_iter().next().unwrap())),
_ => Ok(SuffixResult::Ambiguous(candidates)),
}
}
/// Escape LIKE metacharacters. All queries using this must include `ESCAPE '\'`.
fn escape_like(input: &str) -> String {
input
.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('_', "\\_")
}
// ─── Scoring Helpers ─────────────────────────────────────────────────────────
/// Exponential half-life decay: `2^(-days / half_life)`.
@@ -1203,11 +978,10 @@ fn query_expert_details(
.collect();
let in_clause = placeholders.join(",");
let notes_indexed_by = "INDEXED BY idx_notes_diffnote_path_created";
let sql = format!(
"
WITH signals AS (
-- 1. DiffNote reviewer
-- 1. DiffNote reviewer (matches both new_path and old_path for renamed files)
SELECT
n.author_username AS username,
'reviewer' AS role,
@@ -1216,7 +990,7 @@ fn query_expert_details(
m.title AS title,
COUNT(*) AS note_count,
MAX(n.created_at) AS last_activity
FROM notes n {notes_indexed_by}
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN merge_requests m ON d.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
@@ -1224,8 +998,9 @@ fn query_expert_details(
AND n.is_system = 0
AND n.author_username IS NOT NULL
AND (m.author_username IS NULL OR n.author_username != m.author_username)
AND m.state IN ('opened','merged')
AND n.position_new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (n.position_new_path {path_op}
OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op}))
AND n.created_at >= ?2
AND (?3 IS NULL OR n.project_id = ?3)
AND n.author_username IN ({in_clause})
@@ -1233,7 +1008,7 @@ fn query_expert_details(
UNION ALL
-- 2. DiffNote MR author
-- 2. DiffNote MR author (matches both new_path and old_path for renamed files)
SELECT
m.author_username AS username,
'author' AS role,
@@ -1244,13 +1019,14 @@ fn query_expert_details(
MAX(n.created_at) AS last_activity
FROM merge_requests m
JOIN discussions d ON d.merge_request_id = m.id
JOIN notes n {notes_indexed_by} ON n.discussion_id = d.id
JOIN notes n ON n.discussion_id = d.id
JOIN projects p ON m.project_id = p.id
WHERE n.note_type = 'DiffNote'
AND n.is_system = 0
AND m.author_username IS NOT NULL
AND m.state IN ('opened','merged')
AND n.position_new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (n.position_new_path {path_op}
OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op}))
AND n.created_at >= ?2
AND (?3 IS NULL OR n.project_id = ?3)
AND m.author_username IN ({in_clause})
@@ -1258,7 +1034,7 @@ fn query_expert_details(
UNION ALL
-- 3. MR author via file changes
-- 3. MR author via file changes (matches both new_path and old_path)
SELECT
m.author_username AS username,
'author' AS role,
@@ -1271,15 +1047,16 @@ fn query_expert_details(
JOIN merge_requests m ON fc.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
WHERE m.author_username IS NOT NULL
AND m.state IN ('opened','merged')
AND fc.new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (fc.new_path {path_op}
OR (fc.old_path IS NOT NULL AND fc.old_path {path_op}))
AND m.updated_at >= ?2
AND (?3 IS NULL OR fc.project_id = ?3)
AND m.author_username IN ({in_clause})
UNION ALL
-- 4. MR reviewer via file changes + mr_reviewers
-- 4. MR reviewer via file changes + mr_reviewers (matches both new_path and old_path)
SELECT
r.username AS username,
'reviewer' AS role,
@@ -1294,8 +1071,9 @@ fn query_expert_details(
JOIN mr_reviewers r ON r.merge_request_id = m.id
WHERE r.username IS NOT NULL
AND (m.author_username IS NULL OR r.username != m.author_username)
AND m.state IN ('opened','merged')
AND fc.new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (fc.new_path {path_op}
OR (fc.old_path IS NOT NULL AND fc.old_path {path_op}))
AND m.updated_at >= ?2
AND (?3 IS NULL OR fc.project_id = ?3)
AND r.username IN ({in_clause})
@@ -1874,50 +1652,51 @@ fn query_overlap(
} else {
"= ?1"
};
// Force the partial index on DiffNote queries (same rationale as expert mode).
// Without this hint SQLite picks idx_notes_system (38% of rows) instead of
// idx_notes_diffnote_path_created (9.3% of rows): measured 50-133x slower.
let notes_indexed_by = "INDEXED BY idx_notes_diffnote_path_created";
// Match both new_path and old_path to capture activity on renamed files.
// INDEXED BY removed to allow OR across path columns; overlap runs once
// per command so the minor plan difference is acceptable.
let sql = format!(
"SELECT username, role, touch_count, last_seen_at, mr_refs FROM (
-- 1. DiffNote reviewer
-- 1. DiffNote reviewer (matches both new_path and old_path)
SELECT
n.author_username AS username,
'reviewer' AS role,
COUNT(DISTINCT m.id) AS touch_count,
MAX(n.created_at) AS last_seen_at,
GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs
FROM notes n {notes_indexed_by}
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN merge_requests m ON d.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
WHERE n.note_type = 'DiffNote'
AND n.position_new_path {path_op}
AND (n.position_new_path {path_op}
OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op}))
AND n.is_system = 0
AND n.author_username IS NOT NULL
AND (m.author_username IS NULL OR n.author_username != m.author_username)
AND m.state IN ('opened','merged')
AND m.state IN ('opened','merged','closed')
AND n.created_at >= ?2
AND (?3 IS NULL OR n.project_id = ?3)
GROUP BY n.author_username
UNION ALL
-- 2. DiffNote MR author
-- 2. DiffNote MR author (matches both new_path and old_path)
SELECT
m.author_username AS username,
'author' AS role,
COUNT(DISTINCT m.id) AS touch_count,
MAX(n.created_at) AS last_seen_at,
GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs
FROM notes n {notes_indexed_by}
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN merge_requests m ON d.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
WHERE n.note_type = 'DiffNote'
AND n.position_new_path {path_op}
AND (n.position_new_path {path_op}
OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op}))
AND n.is_system = 0
AND m.state IN ('opened', 'merged')
AND m.state IN ('opened','merged','closed')
AND m.author_username IS NOT NULL
AND n.created_at >= ?2
AND (?3 IS NULL OR n.project_id = ?3)
@@ -1925,7 +1704,7 @@ fn query_overlap(
UNION ALL
-- 3. MR author via file changes
-- 3. MR author via file changes (matches both new_path and old_path)
SELECT
m.author_username AS username,
'author' AS role,
@@ -1936,15 +1715,16 @@ fn query_overlap(
JOIN merge_requests m ON fc.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
WHERE m.author_username IS NOT NULL
AND m.state IN ('opened','merged')
AND fc.new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (fc.new_path {path_op}
OR (fc.old_path IS NOT NULL AND fc.old_path {path_op}))
AND m.updated_at >= ?2
AND (?3 IS NULL OR fc.project_id = ?3)
GROUP BY m.author_username
UNION ALL
-- 4. MR reviewer via file changes + mr_reviewers
-- 4. MR reviewer via file changes + mr_reviewers (matches both new_path and old_path)
SELECT
r.username AS username,
'reviewer' AS role,
@@ -1957,8 +1737,9 @@ fn query_overlap(
JOIN mr_reviewers r ON r.merge_request_id = m.id
WHERE r.username IS NOT NULL
AND (m.author_username IS NULL OR r.username != m.author_username)
AND m.state IN ('opened','merged')
AND fc.new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (fc.new_path {path_op}
OR (fc.old_path IS NOT NULL AND fc.old_path {path_op}))
AND m.updated_at >= ?2
AND (?3 IS NULL OR fc.project_id = ?3)
GROUP BY r.username