From 6e22f120d0b52cc78d4d2bb64ead71dca4f4bbdf Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Fri, 30 Jan 2026 15:45:54 -0500 Subject: [PATCH] refactor(core): Rename GiError to LoreError and add search infrastructure Mechanical rename of GiError -> LoreError across the core module to match the project's rebranding from gitlab-inbox to gitlore/lore. Updates the error enum name, all From impls, and the Result type alias. Additionally introduces: - New error variants for embedding pipeline: OllamaUnavailable, OllamaModelNotFound, EmbeddingFailed, EmbeddingsNotBuilt. Each includes actionable suggestions (e.g., "ollama serve", "ollama pull nomic-embed-text") to guide users through recovery. - New error codes 14-16 for programmatic handling of Ollama failures. - Savepoint-based migration execution in db.rs: each migration now runs inside a SQLite SAVEPOINT so a failed migration rolls back cleanly without corrupting the schema_version tracking. Previously a partial migration could leave the database in an inconsistent state. - core::backoff module: exponential backoff with jitter utility for retry loops in the embedding pipeline and discussion queues. - core::project module: helper for resolving project IDs and paths from the local database, used by the document regenerator and search filters. Co-Authored-By: Claude Opus 4.5 --- src/core/backoff.rs | 99 +++++++++++++++++++++++++++ src/core/config.rs | 14 ++-- src/core/db.rs | 45 ++++++++++-- src/core/error.rs | 49 +++++++++++-- src/core/lock.rs | 4 +- src/core/mod.rs | 4 +- src/core/project.rs | 163 ++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 5 +- 8 files changed, 361 insertions(+), 22 deletions(-) create mode 100644 src/core/backoff.rs create mode 100644 src/core/project.rs diff --git a/src/core/backoff.rs b/src/core/backoff.rs new file mode 100644 index 0000000..1ee035d --- /dev/null +++ b/src/core/backoff.rs @@ -0,0 +1,99 @@ +use rand::Rng; + +/// Compute next_attempt_at with exponential backoff and jitter. +/// +/// Formula: now + min(3600000, 1000 * 2^attempt_count) * (0.9 to 1.1) +/// - Capped at 1 hour to prevent runaway delays +/// - ±10% jitter prevents synchronized retries after outages +/// +/// Used by: +/// - `dirty_sources` retry scheduling (document regeneration failures) +/// - `pending_discussion_fetches` retry scheduling (API fetch failures) +/// +/// Having one implementation prevents subtle divergence between queues +/// (e.g., different caps or jitter ranges). +pub fn compute_next_attempt_at(now: i64, attempt_count: i64) -> i64 { + // Cap attempt_count to prevent overflow (2^30 > 1 hour anyway) + let capped_attempts = attempt_count.min(30) as u32; + let base_delay_ms = 1000_i64.saturating_mul(1 << capped_attempts); + let capped_delay_ms = base_delay_ms.min(3_600_000); // 1 hour cap + + // Add ±10% jitter + let jitter_factor = rand::thread_rng().gen_range(0.9..=1.1); + let delay_with_jitter = (capped_delay_ms as f64 * jitter_factor) as i64; + + now + delay_with_jitter +} + +#[cfg(test)] +mod tests { + use super::*; + + const MAX_DELAY_MS: i64 = 3_600_000; + + #[test] + fn test_exponential_curve() { + let now = 1_000_000_000_i64; + // Each attempt should roughly double the delay (within jitter) + for attempt in 1..=10 { + let result = compute_next_attempt_at(now, attempt); + let delay = result - now; + let expected_base = 1000_i64 * (1 << attempt); + let min_expected = (expected_base as f64 * 0.89) as i64; + let max_expected = (expected_base as f64 * 1.11) as i64; + assert!( + delay >= min_expected && delay <= max_expected, + "attempt {attempt}: delay {delay} not in [{min_expected}, {max_expected}]" + ); + } + } + + #[test] + fn test_cap_at_one_hour() { + let now = 1_000_000_000_i64; + for attempt in [20, 25, 30, 50, 100] { + let result = compute_next_attempt_at(now, attempt); + let delay = result - now; + let max_with_jitter = (MAX_DELAY_MS as f64 * 1.11) as i64; + assert!( + delay <= max_with_jitter, + "attempt {attempt}: delay {delay} exceeds cap {max_with_jitter}" + ); + } + } + + #[test] + fn test_jitter_range() { + let now = 1_000_000_000_i64; + let attempt = 5; // base = 32000 + let base = 1000_i64 * (1 << attempt); + let min_delay = (base as f64 * 0.89) as i64; + let max_delay = (base as f64 * 1.11) as i64; + + for _ in 0..100 { + let result = compute_next_attempt_at(now, attempt); + let delay = result - now; + assert!( + delay >= min_delay && delay <= max_delay, + "delay {delay} not in jitter range [{min_delay}, {max_delay}]" + ); + } + } + + #[test] + fn test_first_retry_is_about_two_seconds() { + let now = 1_000_000_000_i64; + let result = compute_next_attempt_at(now, 1); + let delay = result - now; + // attempt 1: base = 2000ms, with jitter: 1800-2200ms + assert!(delay >= 1800 && delay <= 2200, "first retry delay: {delay}ms"); + } + + #[test] + fn test_overflow_safety() { + let now = i64::MAX / 2; + // Should not panic even with very large attempt_count + let result = compute_next_attempt_at(now, i64::MAX); + assert!(result > now); + } +} diff --git a/src/core/config.rs b/src/core/config.rs index c342f0a..bfef3e3 100644 --- a/src/core/config.rs +++ b/src/core/config.rs @@ -6,7 +6,7 @@ use serde::Deserialize; use std::fs; use std::path::Path; -use super::error::{GiError, Result}; +use super::error::{LoreError, Result}; use super::paths::get_config_path; /// GitLab connection settings. @@ -130,7 +130,7 @@ impl Config { let config_path = get_config_path(cli_override); if !config_path.exists() { - return Err(GiError::ConfigNotFound { + return Err(LoreError::ConfigNotFound { path: config_path.display().to_string(), }); } @@ -140,25 +140,25 @@ impl Config { /// Load configuration from a specific path. pub fn load_from_path(path: &Path) -> Result { - let content = fs::read_to_string(path).map_err(|e| GiError::ConfigInvalid { + let content = fs::read_to_string(path).map_err(|e| LoreError::ConfigInvalid { details: format!("Failed to read config file: {e}"), })?; let config: Config = - serde_json::from_str(&content).map_err(|e| GiError::ConfigInvalid { + serde_json::from_str(&content).map_err(|e| LoreError::ConfigInvalid { details: format!("Invalid JSON: {e}"), })?; // Validate required fields if config.projects.is_empty() { - return Err(GiError::ConfigInvalid { + return Err(LoreError::ConfigInvalid { details: "At least one project is required".to_string(), }); } for project in &config.projects { if project.path.is_empty() { - return Err(GiError::ConfigInvalid { + return Err(LoreError::ConfigInvalid { details: "Project path cannot be empty".to_string(), }); } @@ -166,7 +166,7 @@ impl Config { // Validate URL format if url::Url::parse(&config.gitlab.base_url).is_err() { - return Err(GiError::ConfigInvalid { + return Err(LoreError::ConfigInvalid { details: format!("Invalid GitLab URL: {}", config.gitlab.base_url), }); } diff --git a/src/core/db.rs b/src/core/db.rs index 25ec0fd..a95ccf8 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -8,7 +8,7 @@ use std::fs; use std::path::Path; use tracing::{debug, info}; -use super::error::{GiError, Result}; +use super::error::{LoreError, Result}; /// Embedded migrations - compiled into the binary. const MIGRATIONS: &[(&str, &str)] = &[ @@ -27,6 +27,18 @@ const MIGRATIONS: &[(&str, &str)] = &[ "006", include_str!("../../migrations/006_merge_requests.sql"), ), + ( + "007", + include_str!("../../migrations/007_documents.sql"), + ), + ( + "008", + include_str!("../../migrations/008_fts5.sql"), + ), + ( + "009", + include_str!("../../migrations/009_embeddings.sql"), + ), ]; /// Create a database connection with production-grade pragmas. @@ -88,13 +100,36 @@ pub fn run_migrations(conn: &Connection) -> Result<()> { continue; } - conn.execute_batch(sql) - .map_err(|e| GiError::MigrationFailed { + // Wrap each migration in a transaction to prevent partial application. + // If the migration SQL already contains BEGIN/COMMIT, execute_batch handles + // it, but wrapping in a savepoint ensures atomicity for those that don't. + let savepoint_name = format!("migration_{}", version); + conn.execute_batch(&format!("SAVEPOINT {}", savepoint_name)) + .map_err(|e| LoreError::MigrationFailed { version, - message: e.to_string(), + message: format!("Failed to create savepoint: {}", e), source: Some(e), })?; + match conn.execute_batch(sql) { + Ok(()) => { + conn.execute_batch(&format!("RELEASE {}", savepoint_name)) + .map_err(|e| LoreError::MigrationFailed { + version, + message: format!("Failed to release savepoint: {}", e), + source: Some(e), + })?; + } + Err(e) => { + let _ = conn.execute_batch(&format!("ROLLBACK TO {}", savepoint_name)); + return Err(LoreError::MigrationFailed { + version, + message: e.to_string(), + source: Some(e), + }); + } + } + info!(version, "Migration applied"); } @@ -146,7 +181,7 @@ pub fn run_migrations_from_dir(conn: &Connection, migrations_dir: &Path) -> Resu let sql = fs::read_to_string(entry.path())?; conn.execute_batch(&sql) - .map_err(|e| GiError::MigrationFailed { + .map_err(|e| LoreError::MigrationFailed { version, message: e.to_string(), source: Some(e), diff --git a/src/core/error.rs b/src/core/error.rs index 351a3b5..b22b476 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -21,6 +21,9 @@ pub enum ErrorCode { TransformError, IoError, InternalError, + OllamaUnavailable, + OllamaModelNotFound, + EmbeddingFailed, } impl std::fmt::Display for ErrorCode { @@ -39,6 +42,9 @@ impl std::fmt::Display for ErrorCode { Self::TransformError => "TRANSFORM_ERROR", Self::IoError => "IO_ERROR", Self::InternalError => "INTERNAL_ERROR", + Self::OllamaUnavailable => "OLLAMA_UNAVAILABLE", + Self::OllamaModelNotFound => "OLLAMA_MODEL_NOT_FOUND", + Self::EmbeddingFailed => "EMBEDDING_FAILED", }; write!(f, "{code}") } @@ -61,13 +67,16 @@ impl ErrorCode { Self::MigrationFailed => 11, Self::IoError => 12, Self::TransformError => 13, + Self::OllamaUnavailable => 14, + Self::OllamaModelNotFound => 15, + Self::EmbeddingFailed => 16, } } } /// Main error type for gitlore. #[derive(Error, Debug)] -pub enum GiError { +pub enum LoreError { #[error("Config file not found at {path}. Run \"lore init\" first.")] ConfigNotFound { path: String }, @@ -129,9 +138,25 @@ pub enum GiError { #[error("{0}")] Other(String), + + #[error("Cannot connect to Ollama at {base_url}. Is it running?")] + OllamaUnavailable { + base_url: String, + #[source] + source: Option, + }, + + #[error("Ollama model '{model}' not found. Run: ollama pull {model}")] + OllamaModelNotFound { model: String }, + + #[error("Embedding failed for document {document_id}: {reason}")] + EmbeddingFailed { document_id: i64, reason: String }, + + #[error("No embeddings found. Run: lore embed")] + EmbeddingsNotBuilt, } -impl GiError { +impl LoreError { /// Get the error code for programmatic handling. pub fn code(&self) -> ErrorCode { match self { @@ -152,6 +177,10 @@ impl GiError { Self::NotFound(_) => ErrorCode::GitLabNotFound, Self::Ambiguous(_) => ErrorCode::GitLabNotFound, Self::Other(_) => ErrorCode::InternalError, + Self::OllamaUnavailable { .. } => ErrorCode::OllamaUnavailable, + Self::OllamaModelNotFound { .. } => ErrorCode::OllamaModelNotFound, + Self::EmbeddingFailed { .. } => ErrorCode::EmbeddingFailed, + Self::EmbeddingsNotBuilt => ErrorCode::EmbeddingFailed, } } @@ -193,7 +222,15 @@ impl GiError { Self::Ambiguous(_) => Some( "Use -p to choose a specific project.\n\n Example:\n lore issues 42 -p group/project-a\n lore mrs 99 -p group/project-b", ), - _ => None, + Self::OllamaUnavailable { .. } => Some("Start Ollama: ollama serve"), + Self::OllamaModelNotFound { .. } => { + Some("Pull the model: ollama pull nomic-embed-text") + } + Self::EmbeddingFailed { .. } => { + Some("Check Ollama logs or retry with 'lore embed --retry-failed'") + } + Self::EmbeddingsNotBuilt => Some("Generate embeddings first: lore embed"), + Self::Json(_) | Self::Io(_) | Self::Transform(_) | Self::Other(_) => None, } } @@ -227,12 +264,12 @@ pub struct RobotErrorOutput { pub error: RobotError, } -impl From<&GiError> for RobotErrorOutput { - fn from(e: &GiError) -> Self { +impl From<&LoreError> for RobotErrorOutput { + fn from(e: &LoreError) -> Self { Self { error: e.to_robot_error(), } } } -pub type Result = std::result::Result; +pub type Result = std::result::Result; diff --git a/src/core/lock.rs b/src/core/lock.rs index 25d81e3..edfc543 100644 --- a/src/core/lock.rs +++ b/src/core/lock.rs @@ -12,7 +12,7 @@ use tracing::{debug, error, info, warn}; use uuid::Uuid; use super::db::create_connection; -use super::error::{GiError, Result}; +use super::error::{LoreError, Result}; use super::time::{ms_to_iso, now_ms}; /// Maximum consecutive heartbeat failures before signaling error. @@ -116,7 +116,7 @@ impl AppLock { } else { // Lock held by another active process - rollback and return error drop(tx); - return Err(GiError::DatabaseLocked { + return Err(LoreError::DatabaseLocked { owner: existing_owner, started_at: ms_to_iso(acquired_at), }); diff --git a/src/core/mod.rs b/src/core/mod.rs index 22dccd5..3cb15a0 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,12 +1,14 @@ //! Core infrastructure modules. +pub mod backoff; pub mod config; pub mod db; pub mod error; pub mod lock; pub mod paths; pub mod payloads; +pub mod project; pub mod time; pub use config::Config; -pub use error::{GiError, Result}; +pub use error::{LoreError, Result}; diff --git a/src/core/project.rs b/src/core/project.rs new file mode 100644 index 0000000..fe76a98 --- /dev/null +++ b/src/core/project.rs @@ -0,0 +1,163 @@ +use rusqlite::Connection; + +use super::error::{LoreError, Result}; + +/// Resolve a project string to a project_id using cascading match: +/// 1. Exact match on path_with_namespace +/// 2. Case-insensitive exact match +/// 3. Suffix match (only if unambiguous) +/// 4. Error with available projects list +pub fn resolve_project(conn: &Connection, project_str: &str) -> Result { + // Step 1: Exact match + let exact = conn.query_row( + "SELECT id FROM projects WHERE path_with_namespace = ?1", + rusqlite::params![project_str], + |row| row.get::<_, i64>(0), + ); + if let Ok(id) = exact { + return Ok(id); + } + + // Step 2: Case-insensitive exact match + let ci = conn.query_row( + "SELECT id FROM projects WHERE LOWER(path_with_namespace) = LOWER(?1)", + rusqlite::params![project_str], + |row| row.get::<_, i64>(0), + ); + if let Ok(id) = ci { + return Ok(id); + } + + // Step 3: Suffix match (unambiguous) + let mut suffix_stmt = conn.prepare( + "SELECT id, path_with_namespace FROM projects + WHERE path_with_namespace LIKE '%/' || ?1 + OR path_with_namespace = ?1" + )?; + let suffix_matches: Vec<(i64, String)> = suffix_stmt + .query_map(rusqlite::params![project_str], |row| { + Ok((row.get(0)?, row.get(1)?)) + })? + .collect::, _>>()?; + + match suffix_matches.len() { + 1 => return Ok(suffix_matches[0].0), + n if n > 1 => { + let matching: Vec = suffix_matches.iter().map(|(_, p)| p.clone()).collect(); + return Err(LoreError::Other(format!( + "Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}", + project_str, + matching.iter().map(|p| format!(" {}", p)).collect::>().join("\n"), + matching[0] + ))); + } + _ => {} + } + + // Step 4: No match — list available projects + let mut all_stmt = conn.prepare( + "SELECT path_with_namespace FROM projects ORDER BY path_with_namespace" + )?; + let all_projects: Vec = all_stmt + .query_map([], |row| row.get(0))? + .collect::, _>>()?; + + if all_projects.is_empty() { + return Err(LoreError::Other(format!( + "Project '{}' not found. No projects have been synced yet.\n\nHint: Run 'lore ingest' first.", + project_str + ))); + } + + Err(LoreError::Other(format!( + "Project '{}' not found.\n\nAvailable projects:\n{}\n\nHint: Use the full path, e.g., --project={}", + project_str, + all_projects.iter().map(|p| format!(" {}", p)).collect::>().join("\n"), + all_projects[0] + ))) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn setup_db() -> Connection { + let conn = Connection::open_in_memory().unwrap(); + conn.execute_batch(" + CREATE TABLE projects ( + id INTEGER PRIMARY KEY, + gitlab_project_id INTEGER UNIQUE NOT NULL, + path_with_namespace TEXT NOT NULL, + default_branch TEXT, + web_url TEXT, + created_at INTEGER, + updated_at INTEGER, + raw_payload_id INTEGER + ); + CREATE INDEX idx_projects_path ON projects(path_with_namespace); + ").unwrap(); + conn + } + + fn insert_project(conn: &Connection, id: i64, path: &str) { + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (?1, ?2, ?3)", + rusqlite::params![id, id * 100, path], + ).unwrap(); + } + + #[test] + fn test_exact_match() { + let conn = setup_db(); + insert_project(&conn, 1, "backend/auth-service"); + let id = resolve_project(&conn, "backend/auth-service").unwrap(); + assert_eq!(id, 1); + } + + #[test] + fn test_case_insensitive() { + let conn = setup_db(); + insert_project(&conn, 1, "backend/auth-service"); + let id = resolve_project(&conn, "Backend/Auth-Service").unwrap(); + assert_eq!(id, 1); + } + + #[test] + fn test_suffix_unambiguous() { + let conn = setup_db(); + insert_project(&conn, 1, "backend/auth-service"); + insert_project(&conn, 2, "frontend/web-ui"); + let id = resolve_project(&conn, "auth-service").unwrap(); + assert_eq!(id, 1); + } + + #[test] + fn test_suffix_ambiguous() { + let conn = setup_db(); + insert_project(&conn, 1, "backend/auth-service"); + insert_project(&conn, 2, "frontend/auth-service"); + let err = resolve_project(&conn, "auth-service").unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("ambiguous"), "Expected ambiguous error, got: {}", msg); + assert!(msg.contains("backend/auth-service")); + assert!(msg.contains("frontend/auth-service")); + } + + #[test] + fn test_no_match() { + let conn = setup_db(); + insert_project(&conn, 1, "backend/auth-service"); + let err = resolve_project(&conn, "nonexistent").unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("not found"), "Expected not found error, got: {}", msg); + assert!(msg.contains("backend/auth-service")); + } + + #[test] + fn test_empty_projects() { + let conn = setup_db(); + let err = resolve_project(&conn, "anything").unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("No projects have been synced")); + } +} diff --git a/src/lib.rs b/src/lib.rs index 7d7ae5d..98d7b71 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,10 @@ pub mod cli; pub mod core; +pub mod documents; +pub mod embedding; pub mod gitlab; pub mod ingestion; +pub mod search; -pub use core::{Config, GiError, Result}; +pub use core::{Config, LoreError, Result};