5 Commits

Author SHA1 Message Date
teernisse
f439c42b3d chore: add gitignore for mock-seed, roam CI workflow, formatting
- Add tools/mock-seed/ to .gitignore
- Add .github/workflows/roam.yml CI workflow
- Add .roam/fitness.yaml architectural fitness rules
- Rustfmt formatting fixes in show.rs and vector.rs
- Beads sync

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 13:50:30 -05:00
teernisse
4f3ec72923 feat(timeline): upgrade seed phase to hybrid search
Replace FTS-only seed entity discovery with hybrid search (FTS + vector
via RRF), using the same search_hybrid infrastructure as the search
command. Falls back gracefully to FTS-only when Ollama is unavailable.

Changes:
- seed_timeline() now accepts OllamaClient, delegates to search_hybrid
- New resolve_documents_to_entities() replaces find_seed_entities()
- SeedResult gains search_mode field tracking actual mode used
- TimelineResult carries search_mode through to JSON renderer
- run_timeline wires up OllamaClient from config
- handle_timeline made async for the hybrid search await
- Tests updated for new function signatures

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 13:50:24 -05:00
teernisse
e6771709f1 refactor(core): extract path_resolver module, fix old_path matching in who
Extract shared path resolution logic from who.rs into a new
core::path_resolver module for cross-module reuse. Functions moved:
escape_like, normalize_repo_path, PathQuery, SuffixResult,
build_path_query, suffix_probe. Duplicate escape_like copies removed
from list.rs, project.rs, and filters.rs — all now import from
path_resolver.

Additionally fixes two bugs in query_expert_details() and
query_overlap() where only position_new_path was checked (missing
old_path matches for renamed files) and state filter excluded 'closed'
MRs despite the main scoring query including them with a decay
multiplier.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 13:50:14 -05:00
Taylor Eernisse
8c86b0dfd7 release: v0.8.1 2026-02-13 11:12:31 -05:00
teernisse
6e55b2470d bugfix: DB column and size issues 2026-02-13 11:11:35 -05:00
21 changed files with 1038 additions and 371 deletions

File diff suppressed because one or more lines are too long

21
.github/workflows/roam.yml vendored Normal file
View File

@@ -0,0 +1,21 @@
name: Roam Code Analysis
on:
pull_request:
branches: [main, master]
permissions:
contents: read
pull-requests: write
jobs:
roam:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- run: pip install roam-code
- run: roam index
- run: roam fitness
- run: roam pr-risk --json

3
.gitignore vendored
View File

@@ -41,6 +41,9 @@ lore.config.json
*.db-shm
# Mock seed data
tools/mock-seed/
# Added by cargo
/target

11
.roam/fitness.yaml Normal file
View File

@@ -0,0 +1,11 @@
rules:
- name: No circular imports in core
type: dependency
source: "src/**"
forbidden_target: "tests/**"
reason: "Production code should not import test modules"
- name: Complexity threshold
type: metric
metric: cognitive_complexity
threshold: 30
reason: "Functions above 30 cognitive complexity need refactoring"

2
Cargo.lock generated
View File

@@ -1106,7 +1106,7 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "lore"
version = "0.8.0"
version = "0.8.1"
dependencies = [
"async-stream",
"chrono",

View File

@@ -1,6 +1,6 @@
[package]
name = "lore"
version = "0.8.0"
version = "0.8.1"
edition = "2024"
description = "Gitlore - Local GitLab data management with semantic search"
authors = ["Taylor Eernisse"]

View File

@@ -6,6 +6,7 @@ use crate::Config;
use crate::cli::robot::{RobotMeta, expand_fields_preset, filter_fields};
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::path_resolver::escape_like as note_escape_like;
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, now_ms, parse_since};
@@ -1257,13 +1258,6 @@ pub struct NoteListFilters {
pub order: String,
}
fn note_escape_like(input: &str) -> String {
input
.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('_', "\\_")
}
pub fn query_notes(
conn: &Connection,
filters: &NoteListFilters,

View File

@@ -160,6 +160,7 @@ pub fn run_show_issue(
})
}
#[derive(Debug)]
struct IssueRow {
id: i64,
iid: i64,
@@ -194,7 +195,7 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu
i.due_date, i.milestone_title,
(SELECT COUNT(*) FROM notes n
JOIN discussions d ON n.discussion_id = d.id
WHERE d.noteable_type = 'Issue' AND d.noteable_id = i.id AND n.is_system = 0) AS user_notes_count,
WHERE d.noteable_type = 'Issue' AND d.issue_id = i.id AND n.is_system = 0) AS user_notes_count,
i.status_name, i.status_category, i.status_color,
i.status_icon_name, i.status_synced_at
FROM issues i
@@ -210,7 +211,7 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu
i.due_date, i.milestone_title,
(SELECT COUNT(*) FROM notes n
JOIN discussions d ON n.discussion_id = d.id
WHERE d.noteable_type = 'Issue' AND d.noteable_id = i.id AND n.is_system = 0) AS user_notes_count,
WHERE d.noteable_type = 'Issue' AND d.issue_id = i.id AND n.is_system = 0) AS user_notes_count,
i.status_name, i.status_category, i.status_color,
i.status_icon_name, i.status_synced_at
FROM issues i
@@ -1218,6 +1219,172 @@ mod tests {
.unwrap();
}
fn seed_second_project(conn: &Connection) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (2, 101, 'other/repo', 'https://gitlab.example.com/other', 1000, 2000)",
[],
)
.unwrap();
}
fn seed_discussion_with_notes(
conn: &Connection,
issue_id: i64,
project_id: i64,
user_notes: usize,
system_notes: usize,
) {
let disc_id: i64 = conn
.query_row(
"SELECT COALESCE(MAX(id), 0) + 1 FROM discussions",
[],
|r| r.get(0),
)
.unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, first_note_at, last_note_at, last_seen_at)
VALUES (?1, ?2, ?3, ?4, 'Issue', 1000, 2000, 2000)",
rusqlite::params![disc_id, format!("disc-{}", disc_id), project_id, issue_id],
)
.unwrap();
for i in 0..user_notes {
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position)
VALUES (?1, ?2, ?3, 'user1', 'comment', 1000, 2000, 2000, 0, ?4)",
rusqlite::params![1000 + disc_id * 100 + i as i64, disc_id, project_id, i as i64],
)
.unwrap();
}
for i in 0..system_notes {
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position)
VALUES (?1, ?2, ?3, 'system', 'status changed', 1000, 2000, 2000, 1, ?4)",
rusqlite::params![2000 + disc_id * 100 + i as i64, disc_id, project_id, (user_notes + i) as i64],
)
.unwrap();
}
}
// --- find_issue tests ---
#[test]
fn test_find_issue_basic() {
let conn = setup_test_db();
seed_issue(&conn);
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.iid, 10);
assert_eq!(row.title, "Test issue");
assert_eq!(row.state, "opened");
assert_eq!(row.author_username, "author");
assert_eq!(row.project_path, "group/repo");
}
#[test]
fn test_find_issue_with_project_filter() {
let conn = setup_test_db();
seed_issue(&conn);
let row = find_issue(&conn, 10, Some("group/repo")).unwrap();
assert_eq!(row.iid, 10);
assert_eq!(row.project_path, "group/repo");
}
#[test]
fn test_find_issue_not_found() {
let conn = setup_test_db();
seed_issue(&conn);
let err = find_issue(&conn, 999, None).unwrap_err();
assert!(matches!(err, LoreError::NotFound(_)));
}
#[test]
fn test_find_issue_wrong_project_filter() {
let conn = setup_test_db();
seed_issue(&conn);
seed_second_project(&conn);
// Issue 10 only exists in project 1, not project 2
let err = find_issue(&conn, 10, Some("other/repo")).unwrap_err();
assert!(matches!(err, LoreError::NotFound(_)));
}
#[test]
fn test_find_issue_ambiguous_without_project() {
let conn = setup_test_db();
seed_issue(&conn); // issue iid=10 in project 1
seed_second_project(&conn);
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, author_username,
created_at, updated_at, last_seen_at)
VALUES (2, 201, 10, 2, 'Same iid different project', 'opened', 'author', 1000, 2000, 2000)",
[],
)
.unwrap();
let err = find_issue(&conn, 10, None).unwrap_err();
assert!(matches!(err, LoreError::Ambiguous(_)));
}
#[test]
fn test_find_issue_ambiguous_resolved_with_project() {
let conn = setup_test_db();
seed_issue(&conn);
seed_second_project(&conn);
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, author_username,
created_at, updated_at, last_seen_at)
VALUES (2, 201, 10, 2, 'Same iid different project', 'opened', 'author', 1000, 2000, 2000)",
[],
)
.unwrap();
let row = find_issue(&conn, 10, Some("other/repo")).unwrap();
assert_eq!(row.title, "Same iid different project");
}
#[test]
fn test_find_issue_user_notes_count_zero() {
let conn = setup_test_db();
seed_issue(&conn);
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.user_notes_count, 0);
}
#[test]
fn test_find_issue_user_notes_count_excludes_system() {
let conn = setup_test_db();
seed_issue(&conn);
// 2 user notes + 3 system notes = should count only 2
seed_discussion_with_notes(&conn, 1, 1, 2, 3);
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.user_notes_count, 2);
}
#[test]
fn test_find_issue_user_notes_count_across_discussions() {
let conn = setup_test_db();
seed_issue(&conn);
seed_discussion_with_notes(&conn, 1, 1, 3, 0); // 3 user notes
seed_discussion_with_notes(&conn, 1, 1, 1, 2); // 1 user note + 2 system
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.user_notes_count, 4);
}
#[test]
fn test_find_issue_notes_count_ignores_other_issues() {
let conn = setup_test_db();
seed_issue(&conn);
// Add a second issue
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, author_username,
created_at, updated_at, last_seen_at)
VALUES (2, 201, 20, 1, 'Other issue', 'opened', 'author', 1000, 2000, 2000)",
[],
)
.unwrap();
// Notes on issue 2, not issue 1
seed_discussion_with_notes(&conn, 2, 1, 5, 0);
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.user_notes_count, 0); // Issue 10 has no notes
}
#[test]
fn test_ansi256_from_rgb() {
assert_eq!(ansi256_from_rgb(0, 0, 0), 16);

View File

@@ -13,6 +13,7 @@ use crate::core::timeline::{
use crate::core::timeline_collect::collect_events;
use crate::core::timeline_expand::expand_timeline;
use crate::core::timeline_seed::seed_timeline;
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
/// Parameters for running the timeline pipeline.
pub struct TimelineParams {
@@ -28,7 +29,7 @@ pub struct TimelineParams {
}
/// Run the full timeline pipeline: SEED -> EXPAND -> COLLECT.
pub fn run_timeline(config: &Config, params: &TimelineParams) -> Result<TimelineResult> {
pub async fn run_timeline(config: &Config, params: &TimelineParams) -> Result<TimelineResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
@@ -50,15 +51,25 @@ pub fn run_timeline(config: &Config, params: &TimelineParams) -> Result<Timeline
})
.transpose()?;
// Stage 1+2: SEED + HYDRATE
// Construct OllamaClient for hybrid search (same pattern as run_search)
let ollama_cfg = &config.embedding;
let client = OllamaClient::new(OllamaConfig {
base_url: ollama_cfg.base_url.clone(),
model: ollama_cfg.model.clone(),
..OllamaConfig::default()
});
// Stage 1+2: SEED + HYDRATE (hybrid search with FTS fallback)
let seed_result = seed_timeline(
&conn,
Some(&client),
&params.query,
project_id,
since_ms,
params.max_seeds,
params.max_evidence,
)?;
)
.await?;
// Stage 3: EXPAND
let expand_result = expand_timeline(
@@ -81,6 +92,7 @@ pub fn run_timeline(config: &Config, params: &TimelineParams) -> Result<Timeline
Ok(TimelineResult {
query: params.query.clone(),
search_mode: seed_result.search_mode,
events,
total_events_before_limit: total_before_limit,
seed_entities: seed_result.seed_entities,
@@ -258,7 +270,7 @@ pub fn print_timeline_json_with_meta(
ok: true,
data: TimelineDataJson::from_result(result),
meta: TimelineMetaJson {
search_mode: "lexical".to_owned(),
search_mode: result.search_mode.clone(),
expansion_depth: depth,
expand_mentions,
total_entities: result.seed_entities.len() + result.expanded_entities.len(),

View File

@@ -9,6 +9,9 @@ use crate::cli::robot::RobotMeta;
use crate::core::config::ScoringConfig;
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::path_resolver::{PathQuery, build_path_query, normalize_repo_path};
#[cfg(test)]
use crate::core::path_resolver::{SuffixResult, escape_like, suffix_probe};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, now_ms, parse_since, parse_since_from};
@@ -75,30 +78,6 @@ fn resolve_mode<'a>(args: &'a WhoArgs) -> Result<WhoMode<'a>> {
))
}
/// Normalize user-supplied repo paths to match stored DiffNote paths.
/// - trims whitespace
/// - strips leading "./" and "/" (repo-relative paths)
/// - converts '\' to '/' when no '/' present (Windows paste)
/// - collapses repeated "//"
fn normalize_repo_path(input: &str) -> String {
let mut s = input.trim().to_string();
// Windows backslash normalization (only when no forward slashes present)
if s.contains('\\') && !s.contains('/') {
s = s.replace('\\', "/");
}
// Strip leading ./
while s.starts_with("./") {
s = s[2..].to_string();
}
// Strip leading /
s = s.trim_start_matches('/').to_string();
// Collapse repeated //
while s.contains("//") {
s = s.replace("//", "/");
}
s
}
// ─── Result Types ────────────────────────────────────────────────────────────
/// Top-level run result: carries resolved inputs + the mode-specific result.
@@ -485,210 +464,6 @@ fn resolve_since_required(input: &str) -> Result<i64> {
// ─── Path Query Construction ─────────────────────────────────────────────────
/// Describes how to match a user-supplied path in SQL.
#[derive(Debug)]
struct PathQuery {
/// The parameter value to bind.
value: String,
/// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`.
is_prefix: bool,
}
/// Build a path query from a user-supplied path, with project-scoped DB probes.
///
/// Rules:
/// - If the path ends with `/`, it's a directory prefix -> `escaped_path/%` (LIKE)
/// - If the path is a root path (no `/`) and does NOT end with `/`, treat as exact (=)
/// - Else if the last path segment contains `.`, heuristic suggests file (=)
/// - Two-way DB probe (project-scoped): when heuristics are ambiguous,
/// probe the DB to resolve.
/// - Otherwise, treat as directory prefix -> `escaped_path/%` (LIKE)
fn build_path_query(conn: &Connection, path: &str, project_id: Option<i64>) -> Result<PathQuery> {
let trimmed = path.trim_end_matches('/');
let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
let is_root = !trimmed.contains('/');
let forced_dir = path.ends_with('/');
// Heuristic is now only a fallback; probes decide first when ambiguous.
let looks_like_file = !forced_dir && (is_root || last_segment.contains('.'));
// Probe 1: exact file exists in DiffNotes OR mr_file_changes (project-scoped)
// Checks both new_path and old_path to support querying renamed files.
// Exact-match probes already use the partial index, but LIKE probes below
// benefit from the INDEXED BY hint (same planner issue as expert query).
let exact_exists = conn
.query_row(
"SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path = ?1 OR position_old_path = ?1)
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![trimmed, project_id],
|_| Ok(()),
)
.is_ok()
|| conn
.query_row(
"SELECT 1 FROM mr_file_changes
WHERE (new_path = ?1 OR old_path = ?1)
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![trimmed, project_id],
|_| Ok(()),
)
.is_ok();
// Probe 2: directory prefix exists in DiffNotes OR mr_file_changes (project-scoped)
// Checks both new_path and old_path to support querying renamed directories.
let prefix_exists = if !forced_dir && !exact_exists {
let escaped = escape_like(trimmed);
let pat = format!("{escaped}/%");
conn.query_row(
"SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_old_path LIKE ?1 ESCAPE '\\')
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![pat, project_id],
|_| Ok(()),
)
.is_ok()
|| conn
.query_row(
"SELECT 1 FROM mr_file_changes
WHERE (new_path LIKE ?1 ESCAPE '\\' OR old_path LIKE ?1 ESCAPE '\\')
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![pat, project_id],
|_| Ok(()),
)
.is_ok()
} else {
false
};
// Probe 3: suffix match — user typed a bare filename or partial path that
// doesn't exist as-is. Search for full paths ending with /input (or equal to input).
// This handles "login.rs" matching "src/auth/login.rs".
let suffix_resolved = if !forced_dir && !exact_exists && !prefix_exists && looks_like_file {
suffix_probe(conn, trimmed, project_id)?
} else {
SuffixResult::NotAttempted
};
match suffix_resolved {
SuffixResult::Unique(full_path) => Ok(PathQuery {
value: full_path,
is_prefix: false,
}),
SuffixResult::Ambiguous(candidates) => {
let list = candidates
.iter()
.map(|p| format!(" {p}"))
.collect::<Vec<_>>()
.join("\n");
Err(LoreError::Ambiguous(format!(
"'{trimmed}' matches multiple paths. Use the full path or -p to scope:\n{list}"
)))
}
SuffixResult::NotAttempted | SuffixResult::NoMatch => {
// Original logic: exact > prefix > heuristic
let is_file = if forced_dir {
false
} else if exact_exists {
true
} else if prefix_exists {
false
} else {
looks_like_file
};
if is_file {
Ok(PathQuery {
value: trimmed.to_string(),
is_prefix: false,
})
} else {
let escaped = escape_like(trimmed);
Ok(PathQuery {
value: format!("{escaped}/%"),
is_prefix: true,
})
}
}
}
}
/// Result of a suffix probe against the DB.
enum SuffixResult {
/// Suffix probe was not attempted (conditions not met).
NotAttempted,
/// No paths matched the suffix.
NoMatch,
/// Exactly one distinct path matched — auto-resolve.
Unique(String),
/// Multiple distinct paths matched — user must disambiguate.
Ambiguous(Vec<String>),
}
/// Probe both notes and mr_file_changes for paths ending with the given suffix.
/// Searches both new_path and old_path columns to support renamed file resolution.
/// Returns up to 11 distinct candidates (enough to detect ambiguity + show a useful list).
fn suffix_probe(conn: &Connection, suffix: &str, project_id: Option<i64>) -> Result<SuffixResult> {
let escaped = escape_like(suffix);
let suffix_pat = format!("%/{escaped}");
let mut stmt = conn.prepare_cached(
"SELECT DISTINCT full_path FROM (
SELECT position_new_path AS full_path
FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_new_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT new_path AS full_path FROM mr_file_changes
WHERE (new_path LIKE ?1 ESCAPE '\\' OR new_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT position_old_path AS full_path FROM notes
WHERE note_type = 'DiffNote'
AND is_system = 0
AND position_old_path IS NOT NULL
AND (position_old_path LIKE ?1 ESCAPE '\\' OR position_old_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT old_path AS full_path FROM mr_file_changes
WHERE old_path IS NOT NULL
AND (old_path LIKE ?1 ESCAPE '\\' OR old_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
)
ORDER BY full_path
LIMIT 11",
)?;
let candidates: Vec<String> = stmt
.query_map(rusqlite::params![suffix_pat, suffix, project_id], |row| {
row.get(0)
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
match candidates.len() {
0 => Ok(SuffixResult::NoMatch),
1 => Ok(SuffixResult::Unique(candidates.into_iter().next().unwrap())),
_ => Ok(SuffixResult::Ambiguous(candidates)),
}
}
/// Escape LIKE metacharacters. All queries using this must include `ESCAPE '\'`.
fn escape_like(input: &str) -> String {
input
.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('_', "\\_")
}
// ─── Scoring Helpers ─────────────────────────────────────────────────────────
/// Exponential half-life decay: `2^(-days / half_life)`.
@@ -1203,11 +978,10 @@ fn query_expert_details(
.collect();
let in_clause = placeholders.join(",");
let notes_indexed_by = "INDEXED BY idx_notes_diffnote_path_created";
let sql = format!(
"
WITH signals AS (
-- 1. DiffNote reviewer
-- 1. DiffNote reviewer (matches both new_path and old_path for renamed files)
SELECT
n.author_username AS username,
'reviewer' AS role,
@@ -1216,7 +990,7 @@ fn query_expert_details(
m.title AS title,
COUNT(*) AS note_count,
MAX(n.created_at) AS last_activity
FROM notes n {notes_indexed_by}
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN merge_requests m ON d.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
@@ -1224,8 +998,9 @@ fn query_expert_details(
AND n.is_system = 0
AND n.author_username IS NOT NULL
AND (m.author_username IS NULL OR n.author_username != m.author_username)
AND m.state IN ('opened','merged')
AND n.position_new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (n.position_new_path {path_op}
OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op}))
AND n.created_at >= ?2
AND (?3 IS NULL OR n.project_id = ?3)
AND n.author_username IN ({in_clause})
@@ -1233,7 +1008,7 @@ fn query_expert_details(
UNION ALL
-- 2. DiffNote MR author
-- 2. DiffNote MR author (matches both new_path and old_path for renamed files)
SELECT
m.author_username AS username,
'author' AS role,
@@ -1244,13 +1019,14 @@ fn query_expert_details(
MAX(n.created_at) AS last_activity
FROM merge_requests m
JOIN discussions d ON d.merge_request_id = m.id
JOIN notes n {notes_indexed_by} ON n.discussion_id = d.id
JOIN notes n ON n.discussion_id = d.id
JOIN projects p ON m.project_id = p.id
WHERE n.note_type = 'DiffNote'
AND n.is_system = 0
AND m.author_username IS NOT NULL
AND m.state IN ('opened','merged')
AND n.position_new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (n.position_new_path {path_op}
OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op}))
AND n.created_at >= ?2
AND (?3 IS NULL OR n.project_id = ?3)
AND m.author_username IN ({in_clause})
@@ -1258,7 +1034,7 @@ fn query_expert_details(
UNION ALL
-- 3. MR author via file changes
-- 3. MR author via file changes (matches both new_path and old_path)
SELECT
m.author_username AS username,
'author' AS role,
@@ -1271,15 +1047,16 @@ fn query_expert_details(
JOIN merge_requests m ON fc.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
WHERE m.author_username IS NOT NULL
AND m.state IN ('opened','merged')
AND fc.new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (fc.new_path {path_op}
OR (fc.old_path IS NOT NULL AND fc.old_path {path_op}))
AND m.updated_at >= ?2
AND (?3 IS NULL OR fc.project_id = ?3)
AND m.author_username IN ({in_clause})
UNION ALL
-- 4. MR reviewer via file changes + mr_reviewers
-- 4. MR reviewer via file changes + mr_reviewers (matches both new_path and old_path)
SELECT
r.username AS username,
'reviewer' AS role,
@@ -1294,8 +1071,9 @@ fn query_expert_details(
JOIN mr_reviewers r ON r.merge_request_id = m.id
WHERE r.username IS NOT NULL
AND (m.author_username IS NULL OR r.username != m.author_username)
AND m.state IN ('opened','merged')
AND fc.new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (fc.new_path {path_op}
OR (fc.old_path IS NOT NULL AND fc.old_path {path_op}))
AND m.updated_at >= ?2
AND (?3 IS NULL OR fc.project_id = ?3)
AND r.username IN ({in_clause})
@@ -1874,50 +1652,51 @@ fn query_overlap(
} else {
"= ?1"
};
// Force the partial index on DiffNote queries (same rationale as expert mode).
// Without this hint SQLite picks idx_notes_system (38% of rows) instead of
// idx_notes_diffnote_path_created (9.3% of rows): measured 50-133x slower.
let notes_indexed_by = "INDEXED BY idx_notes_diffnote_path_created";
// Match both new_path and old_path to capture activity on renamed files.
// INDEXED BY removed to allow OR across path columns; overlap runs once
// per command so the minor plan difference is acceptable.
let sql = format!(
"SELECT username, role, touch_count, last_seen_at, mr_refs FROM (
-- 1. DiffNote reviewer
-- 1. DiffNote reviewer (matches both new_path and old_path)
SELECT
n.author_username AS username,
'reviewer' AS role,
COUNT(DISTINCT m.id) AS touch_count,
MAX(n.created_at) AS last_seen_at,
GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs
FROM notes n {notes_indexed_by}
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN merge_requests m ON d.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
WHERE n.note_type = 'DiffNote'
AND n.position_new_path {path_op}
AND (n.position_new_path {path_op}
OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op}))
AND n.is_system = 0
AND n.author_username IS NOT NULL
AND (m.author_username IS NULL OR n.author_username != m.author_username)
AND m.state IN ('opened','merged')
AND m.state IN ('opened','merged','closed')
AND n.created_at >= ?2
AND (?3 IS NULL OR n.project_id = ?3)
GROUP BY n.author_username
UNION ALL
-- 2. DiffNote MR author
-- 2. DiffNote MR author (matches both new_path and old_path)
SELECT
m.author_username AS username,
'author' AS role,
COUNT(DISTINCT m.id) AS touch_count,
MAX(n.created_at) AS last_seen_at,
GROUP_CONCAT(DISTINCT (p.path_with_namespace || '!' || m.iid)) AS mr_refs
FROM notes n {notes_indexed_by}
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN merge_requests m ON d.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
WHERE n.note_type = 'DiffNote'
AND n.position_new_path {path_op}
AND (n.position_new_path {path_op}
OR (n.position_old_path IS NOT NULL AND n.position_old_path {path_op}))
AND n.is_system = 0
AND m.state IN ('opened', 'merged')
AND m.state IN ('opened','merged','closed')
AND m.author_username IS NOT NULL
AND n.created_at >= ?2
AND (?3 IS NULL OR n.project_id = ?3)
@@ -1925,7 +1704,7 @@ fn query_overlap(
UNION ALL
-- 3. MR author via file changes
-- 3. MR author via file changes (matches both new_path and old_path)
SELECT
m.author_username AS username,
'author' AS role,
@@ -1936,15 +1715,16 @@ fn query_overlap(
JOIN merge_requests m ON fc.merge_request_id = m.id
JOIN projects p ON m.project_id = p.id
WHERE m.author_username IS NOT NULL
AND m.state IN ('opened','merged')
AND fc.new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (fc.new_path {path_op}
OR (fc.old_path IS NOT NULL AND fc.old_path {path_op}))
AND m.updated_at >= ?2
AND (?3 IS NULL OR fc.project_id = ?3)
GROUP BY m.author_username
UNION ALL
-- 4. MR reviewer via file changes + mr_reviewers
-- 4. MR reviewer via file changes + mr_reviewers (matches both new_path and old_path)
SELECT
r.username AS username,
'reviewer' AS role,
@@ -1957,8 +1737,9 @@ fn query_overlap(
JOIN mr_reviewers r ON r.merge_request_id = m.id
WHERE r.username IS NOT NULL
AND (m.author_username IS NULL OR r.username != m.author_username)
AND m.state IN ('opened','merged')
AND fc.new_path {path_op}
AND m.state IN ('opened','merged','closed')
AND (fc.new_path {path_op}
OR (fc.old_path IS NOT NULL AND fc.old_path {path_op}))
AND m.updated_at >= ?2
AND (?3 IS NULL OR fc.project_id = ?3)
GROUP BY r.username

View File

@@ -9,6 +9,7 @@ pub mod lock;
pub mod logging;
pub mod metrics;
pub mod note_parser;
pub mod path_resolver;
pub mod paths;
pub mod payloads;
pub mod project;

244
src/core/path_resolver.rs Normal file
View File

@@ -0,0 +1,244 @@
use rusqlite::Connection;
use super::error::{LoreError, Result};
// ─── SQL Helpers ─────────────────────────────────────────────────────────────
/// Escape LIKE metacharacters (`%`, `_`, `\`).
/// All queries using this must include `ESCAPE '\'`.
pub fn escape_like(input: &str) -> String {
input
.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('_', "\\_")
}
/// Normalize user-supplied repo paths to match stored DiffNote / file-change paths.
/// - trims whitespace
/// - strips leading "./" and "/" (repo-relative paths)
/// - converts '\' to '/' when no '/' present (Windows paste)
/// - collapses repeated "//"
pub fn normalize_repo_path(input: &str) -> String {
let mut s = input.trim().to_string();
// Windows backslash normalization (only when no forward slashes present)
if s.contains('\\') && !s.contains('/') {
s = s.replace('\\', "/");
}
// Strip leading ./
while s.starts_with("./") {
s = s[2..].to_string();
}
// Strip leading /
s = s.trim_start_matches('/').to_string();
// Collapse repeated //
while s.contains("//") {
s = s.replace("//", "/");
}
s
}
// ─── Path Query Resolution ──────────────────────────────────────────────────
/// Describes how to match a user-supplied path in SQL.
#[derive(Debug)]
pub struct PathQuery {
/// The parameter value to bind.
pub value: String,
/// If true: use `LIKE value ESCAPE '\'`. If false: use `= value`.
pub is_prefix: bool,
}
/// Result of a suffix probe against the DB.
pub enum SuffixResult {
/// Suffix probe was not attempted (conditions not met).
NotAttempted,
/// No paths matched the suffix.
NoMatch,
/// Exactly one distinct path matched — auto-resolve.
Unique(String),
/// Multiple distinct paths matched — user must disambiguate.
Ambiguous(Vec<String>),
}
/// Build a path query from a user-supplied path, with project-scoped DB probes.
///
/// Resolution strategy (in priority order):
/// 1. Trailing `/` → directory prefix (LIKE `path/%`)
/// 2. Exact match probe against notes + `mr_file_changes` → exact (= `path`)
/// 3. Directory prefix probe → prefix (LIKE `path/%`)
/// 4. Suffix probe for bare filenames → auto-resolve or ambiguity error
/// 5. Heuristic fallback: `.` in last segment → file, else → directory prefix
pub fn build_path_query(
conn: &Connection,
path: &str,
project_id: Option<i64>,
) -> Result<PathQuery> {
let trimmed = path.trim_end_matches('/');
let last_segment = trimmed.rsplit('/').next().unwrap_or(trimmed);
let is_root = !trimmed.contains('/');
let forced_dir = path.ends_with('/');
// Heuristic is now only a fallback; probes decide first when ambiguous.
let looks_like_file = !forced_dir && (is_root || last_segment.contains('.'));
// Probe 1: exact file exists in DiffNotes OR mr_file_changes (project-scoped)
// Checks both new_path and old_path to support querying renamed files.
let exact_exists = conn
.query_row(
"SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path = ?1 OR position_old_path = ?1)
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![trimmed, project_id],
|_| Ok(()),
)
.is_ok()
|| conn
.query_row(
"SELECT 1 FROM mr_file_changes
WHERE (new_path = ?1 OR old_path = ?1)
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![trimmed, project_id],
|_| Ok(()),
)
.is_ok();
// Probe 2: directory prefix exists in DiffNotes OR mr_file_changes (project-scoped)
let prefix_exists = if !forced_dir && !exact_exists {
let escaped = escape_like(trimmed);
let pat = format!("{escaped}/%");
conn.query_row(
"SELECT 1 FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_old_path LIKE ?1 ESCAPE '\\')
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![pat, project_id],
|_| Ok(()),
)
.is_ok()
|| conn
.query_row(
"SELECT 1 FROM mr_file_changes
WHERE (new_path LIKE ?1 ESCAPE '\\' OR old_path LIKE ?1 ESCAPE '\\')
AND (?2 IS NULL OR project_id = ?2)
LIMIT 1",
rusqlite::params![pat, project_id],
|_| Ok(()),
)
.is_ok()
} else {
false
};
// Probe 3: suffix match — user typed a bare filename or partial path that
// doesn't exist as-is. Search for full paths ending with /input (or equal to input).
// This handles "login.rs" matching "src/auth/login.rs".
let suffix_resolved = if !forced_dir && !exact_exists && !prefix_exists && looks_like_file {
suffix_probe(conn, trimmed, project_id)?
} else {
SuffixResult::NotAttempted
};
match suffix_resolved {
SuffixResult::Unique(full_path) => Ok(PathQuery {
value: full_path,
is_prefix: false,
}),
SuffixResult::Ambiguous(candidates) => {
let list = candidates
.iter()
.map(|p| format!(" {p}"))
.collect::<Vec<_>>()
.join("\n");
Err(LoreError::Ambiguous(format!(
"'{trimmed}' matches multiple paths. Use the full path or -p to scope:\n{list}"
)))
}
SuffixResult::NotAttempted | SuffixResult::NoMatch => {
// Original logic: exact > prefix > heuristic
let is_file = if forced_dir {
false
} else if exact_exists {
true
} else if prefix_exists {
false
} else {
looks_like_file
};
if is_file {
Ok(PathQuery {
value: trimmed.to_string(),
is_prefix: false,
})
} else {
let escaped = escape_like(trimmed);
Ok(PathQuery {
value: format!("{escaped}/%"),
is_prefix: true,
})
}
}
}
}
/// Probe both notes and mr_file_changes for paths ending with the given suffix.
/// Searches both new_path and old_path columns to support renamed file resolution.
/// Returns up to 11 distinct candidates (enough to detect ambiguity + show a useful list).
pub fn suffix_probe(
conn: &Connection,
suffix: &str,
project_id: Option<i64>,
) -> Result<SuffixResult> {
let escaped = escape_like(suffix);
let suffix_pat = format!("%/{escaped}");
let mut stmt = conn.prepare_cached(
"SELECT DISTINCT full_path FROM (
SELECT position_new_path AS full_path
FROM notes INDEXED BY idx_notes_diffnote_path_created
WHERE note_type = 'DiffNote'
AND is_system = 0
AND (position_new_path LIKE ?1 ESCAPE '\\' OR position_new_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT new_path AS full_path FROM mr_file_changes
WHERE (new_path LIKE ?1 ESCAPE '\\' OR new_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT position_old_path AS full_path FROM notes
WHERE note_type = 'DiffNote'
AND is_system = 0
AND position_old_path IS NOT NULL
AND (position_old_path LIKE ?1 ESCAPE '\\' OR position_old_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
UNION
SELECT old_path AS full_path FROM mr_file_changes
WHERE old_path IS NOT NULL
AND (old_path LIKE ?1 ESCAPE '\\' OR old_path = ?2)
AND (?3 IS NULL OR project_id = ?3)
)
ORDER BY full_path
LIMIT 11",
)?;
let candidates: Vec<String> = stmt
.query_map(rusqlite::params![suffix_pat, suffix, project_id], |row| {
row.get(0)
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
match candidates.len() {
0 => Ok(SuffixResult::NoMatch),
1 => Ok(SuffixResult::Unique(candidates.into_iter().next().unwrap())),
_ => Ok(SuffixResult::Ambiguous(candidates)),
}
}
#[cfg(test)]
#[path = "path_resolver_tests.rs"]
mod tests;

View File

@@ -0,0 +1,290 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn seed_project(conn: &Connection, id: i64) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (?1, ?1, 'group/repo', 'https://gl.example.com/group/repo', 1000, 2000)",
rusqlite::params![id],
)
.unwrap();
}
fn seed_mr(conn: &Connection, mr_id: i64, project_id: i64) {
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, \
created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (?1, ?1, ?1, ?2, 'MR', 'merged', 1000, 2000, 2000, 'feat', 'main')",
rusqlite::params![mr_id, project_id],
)
.unwrap();
}
fn seed_file_change(conn: &Connection, mr_id: i64, project_id: i64, path: &str) {
conn.execute(
"INSERT INTO mr_file_changes (merge_request_id, project_id, new_path, change_type)
VALUES (?1, ?2, ?3, 'modified')",
rusqlite::params![mr_id, project_id, path],
)
.unwrap();
}
fn seed_diffnote(conn: &Connection, id: i64, project_id: i64, path: &str) {
// Need a discussion first (MergeRequest type, linked to mr_id=1)
conn.execute(
"INSERT OR IGNORE INTO discussions (id, gitlab_discussion_id, project_id, \
merge_request_id, noteable_type, resolvable, resolved, last_seen_at, last_note_at)
VALUES (?1, ?2, ?3, 1, 'MergeRequest', 1, 0, 2000, 2000)",
rusqlite::params![id, format!("disc-{id}"), project_id],
)
.unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, note_type, is_system, \
author_username, body, created_at, updated_at, last_seen_at, position_new_path)
VALUES (?1, ?1, ?1, ?2, 'DiffNote', 0, 'user', 'note', 1000, 2000, 2000, ?3)",
rusqlite::params![id, project_id, path],
)
.unwrap();
}
// ─── escape_like ─────────────────────────────────────────────────────────────
#[test]
fn test_escape_like() {
assert_eq!(escape_like("normal/path"), "normal/path");
assert_eq!(escape_like("has_underscore"), "has\\_underscore");
assert_eq!(escape_like("has%percent"), "has\\%percent");
assert_eq!(escape_like("has\\backslash"), "has\\\\backslash");
}
// ─── normalize_repo_path ─────────────────────────────────────────────────────
#[test]
fn test_normalize_repo_path() {
assert_eq!(normalize_repo_path("./src/foo/"), "src/foo/");
assert_eq!(normalize_repo_path("/src/foo/"), "src/foo/");
assert_eq!(normalize_repo_path("././src/foo"), "src/foo");
assert_eq!(normalize_repo_path("src\\foo\\bar.rs"), "src/foo/bar.rs");
assert_eq!(normalize_repo_path("src/foo\\bar"), "src/foo\\bar");
assert_eq!(normalize_repo_path("src//foo//bar/"), "src/foo/bar/");
assert_eq!(normalize_repo_path(" src/foo/ "), "src/foo/");
assert_eq!(normalize_repo_path("src/foo/bar.rs"), "src/foo/bar.rs");
assert_eq!(normalize_repo_path(""), "");
}
// ─── build_path_query heuristics (no DB data) ──────────────────────────────
#[test]
fn test_trailing_slash_is_prefix() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "src/auth/", None).unwrap();
assert_eq!(pq.value, "src/auth/%");
assert!(pq.is_prefix);
}
#[test]
fn test_no_dot_in_last_segment_is_prefix() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "src/auth", None).unwrap();
assert_eq!(pq.value, "src/auth/%");
assert!(pq.is_prefix);
}
#[test]
fn test_file_extension_is_exact() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "src/auth/login.rs", None).unwrap();
assert_eq!(pq.value, "src/auth/login.rs");
assert!(!pq.is_prefix);
}
#[test]
fn test_root_file_is_exact() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "README.md", None).unwrap();
assert_eq!(pq.value, "README.md");
assert!(!pq.is_prefix);
}
#[test]
fn test_dotless_root_file_is_exact() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "Makefile", None).unwrap();
assert_eq!(pq.value, "Makefile");
assert!(!pq.is_prefix);
let pq = build_path_query(&conn, "LICENSE", None).unwrap();
assert_eq!(pq.value, "LICENSE");
assert!(!pq.is_prefix);
}
#[test]
fn test_metacharacters_escaped_in_prefix() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "src/test_files/", None).unwrap();
assert_eq!(pq.value, "src/test\\_files/%");
assert!(pq.is_prefix);
}
#[test]
fn test_exact_value_not_escaped() {
let conn = setup_test_db();
let pq = build_path_query(&conn, "README_with_underscore.md", None).unwrap();
assert_eq!(pq.value, "README_with_underscore.md");
assert!(!pq.is_prefix);
}
// ─── build_path_query DB probes ─────────────────────────────────────────────
#[test]
fn test_db_probe_detects_dotless_file() {
// "src/Dockerfile" has no dot in last segment -> normally prefix.
// DB probe detects it's actually a file.
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_diffnote(&conn, 1, 1, "src/Dockerfile");
let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap();
assert_eq!(pq.value, "src/Dockerfile");
assert!(!pq.is_prefix);
// Without DB data -> falls through to prefix
let empty = setup_test_db();
let pq2 = build_path_query(&empty, "src/Dockerfile", None).unwrap();
assert!(pq2.is_prefix);
}
#[test]
fn test_db_probe_via_file_changes() {
// Exact match via mr_file_changes even without notes
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/Dockerfile");
let pq = build_path_query(&conn, "src/Dockerfile", None).unwrap();
assert_eq!(pq.value, "src/Dockerfile");
assert!(!pq.is_prefix);
}
#[test]
fn test_db_probe_project_scoped() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_project(&conn, 2);
seed_mr(&conn, 1, 1);
seed_diffnote(&conn, 1, 1, "infra/Makefile");
// Unscoped: finds it
assert!(
!build_path_query(&conn, "infra/Makefile", None)
.unwrap()
.is_prefix
);
// Scoped to project 1: finds it
assert!(
!build_path_query(&conn, "infra/Makefile", Some(1))
.unwrap()
.is_prefix
);
// Scoped to project 2: no data -> prefix
assert!(
build_path_query(&conn, "infra/Makefile", Some(2))
.unwrap()
.is_prefix
);
}
// ─── suffix resolution ──────────────────────────────────────────────────────
#[test]
fn test_suffix_resolves_bare_filename() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/auth/login.rs");
let pq = build_path_query(&conn, "login.rs", None).unwrap();
assert_eq!(pq.value, "src/auth/login.rs");
assert!(!pq.is_prefix);
}
#[test]
fn test_suffix_resolves_partial_path() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/auth/login.rs");
let pq = build_path_query(&conn, "auth/login.rs", None).unwrap();
assert_eq!(pq.value, "src/auth/login.rs");
assert!(!pq.is_prefix);
}
#[test]
fn test_suffix_ambiguous_returns_error() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/auth/utils.rs");
seed_file_change(&conn, 1, 1, "src/db/utils.rs");
let err = build_path_query(&conn, "utils.rs", None).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("src/auth/utils.rs"), "candidates: {msg}");
assert!(msg.contains("src/db/utils.rs"), "candidates: {msg}");
}
#[test]
fn test_suffix_scoped_to_project() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_project(&conn, 2);
seed_mr(&conn, 1, 1);
seed_mr(&conn, 2, 2);
seed_file_change(&conn, 1, 1, "src/utils.rs");
seed_file_change(&conn, 2, 2, "lib/utils.rs");
// Unscoped: ambiguous
assert!(build_path_query(&conn, "utils.rs", None).is_err());
// Scoped to project 1: resolves
let pq = build_path_query(&conn, "utils.rs", Some(1)).unwrap();
assert_eq!(pq.value, "src/utils.rs");
}
#[test]
fn test_suffix_deduplicates_across_sources() {
// Same path in notes AND file_changes -> single match, not ambiguous
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "src/auth/login.rs");
seed_diffnote(&conn, 1, 1, "src/auth/login.rs");
let pq = build_path_query(&conn, "login.rs", None).unwrap();
assert_eq!(pq.value, "src/auth/login.rs");
assert!(!pq.is_prefix);
}
#[test]
fn test_exact_match_preferred_over_suffix() {
let conn = setup_test_db();
seed_project(&conn, 1);
seed_mr(&conn, 1, 1);
seed_file_change(&conn, 1, 1, "README.md");
seed_file_change(&conn, 1, 1, "docs/README.md");
// "README.md" exists as exact match -> no ambiguity
let pq = build_path_query(&conn, "README.md", None).unwrap();
assert_eq!(pq.value, "README.md");
assert!(!pq.is_prefix);
}

View File

@@ -1,6 +1,7 @@
use rusqlite::Connection;
use super::error::{LoreError, Result};
use super::path_resolver::escape_like;
pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
let exact = conn.query_row(
@@ -106,13 +107,6 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
/// Escape LIKE metacharacters so `%` and `_` in user input are treated as
/// literals. All queries using this must include `ESCAPE '\'`.
fn escape_like(input: &str) -> String {
input
.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('_', "\\_")
}
#[cfg(test)]
#[path = "project_tests.rs"]
mod tests;

View File

@@ -118,6 +118,8 @@ pub struct UnresolvedRef {
#[derive(Debug, Clone, Serialize)]
pub struct TimelineResult {
pub query: String,
/// The search mode actually used for seeding (e.g. "hybrid", "lexical", "lexical (hybrid fallback)").
pub search_mode: String,
pub events: Vec<TimelineEvent>,
/// Total events before the `--limit` was applied (for meta.total_events vs meta.showing).
#[serde(skip)]

View File

@@ -5,23 +5,28 @@ use tracing::debug;
use crate::core::error::Result;
use crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType, resolve_entity_ref};
use crate::search::{FtsQueryMode, to_fts_query};
use crate::embedding::ollama::OllamaClient;
use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_fts_query};
/// Result of the seed + hydrate phases.
pub struct SeedResult {
pub seed_entities: Vec<EntityRef>,
pub evidence_notes: Vec<TimelineEvent>,
/// The search mode actually used (hybrid with fallback info).
pub search_mode: String,
}
/// Run the SEED + HYDRATE phases of the timeline pipeline.
///
/// 1. SEED: FTS5 keyword search over documents -> matched document IDs
/// 1. SEED: Hybrid search (FTS + vector via RRF) over documents -> matched document IDs
/// 2. HYDRATE: Map document IDs -> source entities + top matched notes as evidence
///
/// When `client` is `None` or Ollama is unavailable, falls back to FTS-only search.
/// Discussion documents are resolved to their parent entity (issue or MR).
/// Entities are deduplicated. Evidence notes are capped at `max_evidence`.
pub fn seed_timeline(
pub async fn seed_timeline(
conn: &Connection,
client: Option<&OllamaClient>,
query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
@@ -33,57 +38,110 @@ pub fn seed_timeline(
return Ok(SeedResult {
seed_entities: Vec::new(),
evidence_notes: Vec::new(),
search_mode: "lexical".to_owned(),
});
}
let seed_entities = find_seed_entities(conn, &fts_query, project_id, since_ms, max_seeds)?;
// Use hybrid search for seed entity discovery (better recall than FTS alone).
// search_hybrid gracefully falls back to FTS-only when Ollama is unavailable.
let filters = SearchFilters {
project_id,
updated_since: since_ms,
limit: max_seeds.saturating_mul(3),
..SearchFilters::default()
};
let (hybrid_results, warnings) = search_hybrid(
conn,
client,
query,
SearchMode::Hybrid,
&filters,
FtsQueryMode::Safe,
)
.await?;
let search_mode = if warnings
.iter()
.any(|w| w.contains("falling back") || w.contains("FTS only"))
{
"lexical (hybrid fallback)".to_owned()
} else if client.is_some() && !hybrid_results.is_empty() {
"hybrid".to_owned()
} else {
"lexical".to_owned()
};
for w in &warnings {
debug!(warning = %w, "hybrid search warning during timeline seeding");
}
let seed_entities = resolve_documents_to_entities(
conn,
&hybrid_results
.iter()
.map(|r| r.document_id)
.collect::<Vec<_>>(),
max_seeds,
)?;
// Evidence notes stay FTS-only (supplementary context, not worth a second embedding call)
let evidence_notes = find_evidence_notes(conn, &fts_query, project_id, since_ms, max_evidence)?;
Ok(SeedResult {
seed_entities,
evidence_notes,
search_mode,
})
}
/// Find seed entities via FTS5 search, resolving discussions to their parent entity.
fn find_seed_entities(
/// Resolve a list of document IDs to deduplicated entity refs.
/// Discussion documents are resolved to their parent entity (issue or MR).
fn resolve_documents_to_entities(
conn: &Connection,
fts_query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
max_seeds: usize,
document_ids: &[i64],
max_entities: usize,
) -> Result<Vec<EntityRef>> {
let sql = r"
if document_ids.is_empty() {
return Ok(Vec::new());
}
let placeholders: String = document_ids
.iter()
.map(|_| "?")
.collect::<Vec<_>>()
.join(",");
let sql = format!(
r"
SELECT d.source_type, d.source_id, d.project_id,
disc.issue_id, disc.merge_request_id
FROM documents_fts
JOIN documents d ON d.id = documents_fts.rowid
FROM documents d
LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
WHERE documents_fts MATCH ?1
AND (?2 IS NULL OR d.project_id = ?2)
AND (?3 IS NULL OR d.updated_at >= ?3)
ORDER BY rank
LIMIT ?4
";
WHERE d.id IN ({placeholders})
ORDER BY CASE d.id {order_clause} END
",
order_clause = document_ids
.iter()
.enumerate()
.map(|(i, id)| format!("WHEN {id} THEN {i}"))
.collect::<Vec<_>>()
.join(" "),
);
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map(
rusqlite::params![
fts_query,
project_id,
since_ms,
max_seeds.saturating_mul(3) as i64
],
|row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, i64>(2)?,
row.get::<_, Option<i64>>(3)?,
row.get::<_, Option<i64>>(4)?,
))
},
)?;
let mut stmt = conn.prepare(&sql)?;
let params: Vec<&dyn rusqlite::types::ToSql> = document_ids
.iter()
.map(|id| id as &dyn rusqlite::types::ToSql)
.collect();
let rows = stmt.query_map(params.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, i64>(2)?,
row.get::<_, Option<i64>>(3)?,
row.get::<_, Option<i64>>(4)?,
))
})?;
let mut seen = HashSet::new();
let mut entities = Vec::new();
@@ -116,7 +174,7 @@ fn find_seed_entities(
entities.push(entity_ref);
}
if entities.len() >= max_seeds {
if entities.len() >= max_entities {
break;
}
}

View File

@@ -85,16 +85,18 @@ fn insert_note(
conn.last_insert_rowid()
}
#[test]
fn test_seed_empty_query_returns_empty() {
#[tokio::test]
async fn test_seed_empty_query_returns_empty() {
let conn = setup_test_db();
let result = seed_timeline(&conn, "", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
assert!(result.evidence_notes.is_empty());
}
#[test]
fn test_seed_no_matches_returns_empty() {
#[tokio::test]
async fn test_seed_no_matches_returns_empty() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
@@ -106,12 +108,14 @@ fn test_seed_no_matches_returns_empty() {
"unrelated content here",
);
let result = seed_timeline(&conn, "nonexistent_xyzzy_query", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "nonexistent_xyzzy_query", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
}
#[test]
fn test_seed_finds_issue() {
#[tokio::test]
async fn test_seed_finds_issue() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
@@ -123,15 +127,17 @@ fn test_seed_finds_issue() {
"authentication error in login flow",
);
let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 42);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
#[test]
fn test_seed_finds_mr() {
#[tokio::test]
async fn test_seed_finds_mr() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
@@ -143,14 +149,16 @@ fn test_seed_finds_mr() {
"fix authentication bug",
);
let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "merge_request");
assert_eq!(result.seed_entities[0].entity_iid, 99);
}
#[test]
fn test_seed_deduplicates_entities() {
#[tokio::test]
async fn test_seed_deduplicates_entities() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 10);
@@ -172,14 +180,16 @@ fn test_seed_deduplicates_entities() {
"authentication error second doc",
);
let result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
// Should deduplicate: both map to the same issue
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_iid, 10);
}
#[test]
fn test_seed_resolves_discussion_to_parent() {
#[tokio::test]
async fn test_seed_resolves_discussion_to_parent() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 7);
@@ -192,14 +202,16 @@ fn test_seed_resolves_discussion_to_parent() {
"deployment pipeline failed",
);
let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 7);
}
#[test]
fn test_seed_evidence_capped() {
#[tokio::test]
async fn test_seed_evidence_capped() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
@@ -223,12 +235,14 @@ fn test_seed_evidence_capped() {
);
}
let result = seed_timeline(&conn, "deployment", None, None, 50, 5).unwrap();
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 5)
.await
.unwrap();
assert!(result.evidence_notes.len() <= 5);
}
#[test]
fn test_seed_evidence_snippet_truncated() {
#[tokio::test]
async fn test_seed_evidence_snippet_truncated() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
@@ -244,7 +258,9 @@ fn test_seed_evidence_snippet_truncated() {
let long_body = "x".repeat(500);
insert_note(&conn, disc_id, project_id, &long_body, false);
let result = seed_timeline(&conn, "deployment", None, None, 50, 10).unwrap();
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert!(!result.evidence_notes.is_empty());
if let TimelineEventType::NoteEvidence { snippet, .. } = &result.evidence_notes[0].event_type {
assert!(snippet.chars().count() <= 200);
@@ -253,8 +269,8 @@ fn test_seed_evidence_snippet_truncated() {
}
}
#[test]
fn test_seed_respects_project_filter() {
#[tokio::test]
async fn test_seed_respects_project_filter() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
@@ -285,7 +301,17 @@ fn test_seed_respects_project_filter() {
);
// Filter to project 1 only
let result = seed_timeline(&conn, "authentication", Some(project_id), None, 50, 10).unwrap();
let result = seed_timeline(
&conn,
None,
"authentication",
Some(project_id),
None,
50,
10,
)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}

View File

@@ -179,7 +179,9 @@ async fn main() {
Some(Commands::Search(args)) => {
handle_search(cli.config.as_deref(), args, robot_mode).await
}
Some(Commands::Timeline(args)) => handle_timeline(cli.config.as_deref(), args, robot_mode),
Some(Commands::Timeline(args)) => {
handle_timeline(cli.config.as_deref(), args, robot_mode).await
}
Some(Commands::Who(args)) => handle_who(cli.config.as_deref(), args, robot_mode),
Some(Commands::Drift {
entity_type,
@@ -1763,7 +1765,7 @@ async fn handle_stats(
Ok(())
}
fn handle_timeline(
async fn handle_timeline(
config_override: Option<&str>,
args: TimelineArgs,
robot_mode: bool,
@@ -1784,7 +1786,7 @@ fn handle_timeline(
max_evidence: args.max_evidence,
};
let result = run_timeline(&config, &params)?;
let result = run_timeline(&config, &params).await?;
if robot_mode {
print_timeline_json_with_meta(

View File

@@ -1,4 +1,5 @@
use crate::core::error::Result;
use crate::core::path_resolver::escape_like;
use crate::documents::SourceType;
use rusqlite::Connection;
@@ -43,12 +44,6 @@ impl SearchFilters {
}
}
fn escape_like(s: &str) -> String {
s.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('_', "\\_")
}
pub fn apply_filters(
conn: &Connection,
document_ids: &[i64],

View File

@@ -40,6 +40,17 @@ fn max_chunks_per_document(conn: &Connection) -> Result<i64> {
.unwrap_or(1))
}
/// sqlite-vec hard limit for KNN `k` parameter.
const SQLITE_VEC_KNN_MAX: usize = 4_096;
/// Compute the KNN k value from the requested limit and the max chunks per
/// document. The result is guaranteed to never exceed [`SQLITE_VEC_KNN_MAX`].
fn compute_knn_k(limit: usize, max_chunks_per_doc: i64) -> usize {
let max_chunks = max_chunks_per_doc.unsigned_abs().max(1) as usize;
let multiplier = (max_chunks * 3 / 2 + 1).clamp(8, 200);
(limit * multiplier).min(SQLITE_VEC_KNN_MAX)
}
pub fn search_vector(
conn: &Connection,
query_embedding: &[f32],
@@ -55,8 +66,7 @@ pub fn search_vector(
.collect();
let max_chunks = max_chunks_per_document(conn)?.max(1);
let multiplier = ((max_chunks.unsigned_abs() as usize * 3 / 2) + 1).clamp(8, 200);
let k = (limit * multiplier).min(10_000);
let k = compute_knn_k(limit, max_chunks);
let mut stmt = conn.prepare(
"SELECT rowid, distance
@@ -124,6 +134,52 @@ mod tests {
assert_eq!(results.len(), 2);
}
#[test]
fn test_knn_k_never_exceeds_sqlite_vec_limit() {
for limit in [1, 10, 50, 100, 500, 1000, 1500, 2000, 5000] {
for max_chunks in [1, 2, 5, 10, 50, 100, 200, 500, 1000] {
let k = compute_knn_k(limit, max_chunks);
assert!(
k <= SQLITE_VEC_KNN_MAX,
"k={k} exceeded limit for limit={limit}, max_chunks={max_chunks}"
);
}
}
}
#[test]
fn test_knn_k_reproduces_original_bug_scenario() {
let k = compute_knn_k(1500, 1);
assert!(
k <= SQLITE_VEC_KNN_MAX,
"k={k} exceeded 4096 at RECALL_CAP with 1 chunk"
);
}
#[test]
fn test_knn_k_small_limit_uses_minimum_multiplier() {
let k = compute_knn_k(10, 1);
assert_eq!(k, 80);
}
#[test]
fn test_knn_k_high_chunks_caps_multiplier() {
let k = compute_knn_k(10, 200);
assert_eq!(k, 2000);
}
#[test]
fn test_knn_k_zero_max_chunks_treated_as_one() {
let k = compute_knn_k(10, 0);
assert_eq!(k, 80);
}
#[test]
fn test_knn_k_negative_max_chunks_uses_absolute() {
let k = compute_knn_k(10, -5);
assert_eq!(k, compute_knn_k(10, 5));
}
fn search_vector_dedup(rows: Vec<(i64, f64)>, limit: usize) -> Vec<VectorResult> {
let mut best: HashMap<i64, f64> = HashMap::new();
for (rowid, distance) in rows {

View File

@@ -108,8 +108,8 @@ fn insert_label_event(
/// Full pipeline: seed -> expand -> collect for a scenario with an issue
/// that has a closing MR, state changes, and label events.
#[test]
fn pipeline_seed_expand_collect_end_to_end() {
#[tokio::test]
async fn pipeline_seed_expand_collect_end_to_end() {
let conn = setup_db();
let project_id = insert_project(&conn, "group/project");
@@ -149,7 +149,9 @@ fn pipeline_seed_expand_collect_end_to_end() {
insert_label_event(&conn, project_id, Some(issue_id), "bug", 1500);
// SEED: find entities matching "authentication"
let seed_result = seed_timeline(&conn, "authentication", None, None, 50, 10).unwrap();
let seed_result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert!(
!seed_result.seed_entities.is_empty(),
"Seed should find at least one entity"
@@ -213,12 +215,14 @@ fn pipeline_seed_expand_collect_end_to_end() {
}
/// Verify the pipeline handles an empty FTS result gracefully.
#[test]
fn pipeline_empty_query_produces_empty_result() {
#[tokio::test]
async fn pipeline_empty_query_produces_empty_result() {
let conn = setup_db();
let _project_id = insert_project(&conn, "group/project");
let seed_result = seed_timeline(&conn, "", None, None, 50, 10).unwrap();
let seed_result = seed_timeline(&conn, None, "", None, None, 50, 10)
.await
.unwrap();
assert!(seed_result.seed_entities.is_empty());
let expand_result = expand_timeline(&conn, &seed_result.seed_entities, 1, false, 100).unwrap();
@@ -237,8 +241,8 @@ fn pipeline_empty_query_produces_empty_result() {
}
/// Verify since filter propagates through the full pipeline.
#[test]
fn pipeline_since_filter_excludes_old_events() {
#[tokio::test]
async fn pipeline_since_filter_excludes_old_events() {
let conn = setup_db();
let project_id = insert_project(&conn, "group/project");
@@ -255,7 +259,9 @@ fn pipeline_since_filter_excludes_old_events() {
insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 2000);
insert_state_event(&conn, project_id, Some(issue_id), None, "reopened", 8000);
let seed_result = seed_timeline(&conn, "deploy", None, None, 50, 10).unwrap();
let seed_result = seed_timeline(&conn, None, "deploy", None, None, 50, 10)
.await
.unwrap();
let expand_result = expand_timeline(&conn, &seed_result.seed_entities, 0, false, 100).unwrap();
// Collect with since=5000: should exclude Created(1000) and closed(2000)
@@ -274,8 +280,8 @@ fn pipeline_since_filter_excludes_old_events() {
}
/// Verify unresolved references use Option<i64> for target_iid.
#[test]
fn pipeline_unresolved_refs_have_optional_iid() {
#[tokio::test]
async fn pipeline_unresolved_refs_have_optional_iid() {
let conn = setup_db();
let project_id = insert_project(&conn, "group/project");
@@ -302,7 +308,9 @@ fn pipeline_unresolved_refs_have_optional_iid() {
)
.unwrap();
let seed_result = seed_timeline(&conn, "cross project", None, None, 50, 10).unwrap();
let seed_result = seed_timeline(&conn, None, "cross project", None, None, 50, 10)
.await
.unwrap();
let expand_result = expand_timeline(&conn, &seed_result.seed_entities, 1, false, 100).unwrap();
assert_eq!(expand_result.unresolved_references.len(), 2);