diff --git a/src/cli/autocorrect.rs b/src/cli/autocorrect.rs index 9f84d53..ab8b1ca 100644 --- a/src/cli/autocorrect.rs +++ b/src/cli/autocorrect.rs @@ -183,6 +183,7 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[ "--max-evidence", ], ), + ("related", &["--limit", "--project"]), ( "who", &[ diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index 9efd20e..aba600f 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -11,6 +11,7 @@ pub mod ingest; pub mod init; pub mod list; pub mod me; +pub mod related; pub mod search; pub mod show; pub mod stats; @@ -48,6 +49,7 @@ pub use list::{ print_list_notes, print_list_notes_json, query_notes, run_list_issues, run_list_mrs, }; pub use me::run_me; +pub use related::{RelatedResponse, print_related_human, print_related_json, run_related}; pub use search::{ SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search, }; diff --git a/src/cli/commands/related.rs b/src/cli/commands/related.rs new file mode 100644 index 0000000..ce2daf9 --- /dev/null +++ b/src/cli/commands/related.rs @@ -0,0 +1,637 @@ +//! Semantic similarity discovery: find related entities via vector search. + +use std::collections::HashSet; + +use rusqlite::Connection; +use serde::Serialize; + +use crate::cli::render::{Icons, Theme}; +use crate::cli::robot::RobotMeta; +use crate::core::config::Config; +use crate::core::db::create_connection; +use crate::core::error::{LoreError, Result}; +use crate::core::paths::get_db_path; +use crate::core::project::resolve_project; +use crate::core::time::ms_to_iso; +use crate::embedding::ollama::{OllamaClient, OllamaConfig}; +use crate::search::search_vector; + +// --------------------------------------------------------------------------- +// Response types +// --------------------------------------------------------------------------- + +#[derive(Debug, Serialize)] +pub struct RelatedResponse { + pub mode: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub source: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub query: Option, + pub results: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub warnings: Vec, +} + +#[derive(Debug, Serialize)] +pub struct RelatedSource { + pub source_type: String, + pub iid: i64, + pub title: String, + pub project_path: String, +} + +#[derive(Debug, Serialize)] +pub struct RelatedResult { + pub source_type: String, + pub iid: i64, + pub title: String, + pub url: String, + pub similarity_score: f64, + pub project_path: String, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub shared_labels: Vec, + pub author: Option, + pub updated_at: String, +} + +// --------------------------------------------------------------------------- +// Internal row types +// --------------------------------------------------------------------------- + +struct DocumentRow { + id: i64, + source_type: String, + source_id: i64, + #[allow(dead_code)] + project_id: i64, + #[allow(dead_code)] + title: Option, + url: Option, + content_text: String, + label_names: Option, + author_username: Option, + updated_at: Option, +} + +struct EntityInfo { + #[allow(dead_code)] + iid: i64, + title: String, + project_path: String, +} + +// --------------------------------------------------------------------------- +// Main entry point +// --------------------------------------------------------------------------- + +/// Run the related command. +/// +/// Modes: +/// - Entity mode: `lore related issues 42` or `lore related mrs 99` +/// - Query mode: `lore related 'search terms'` +pub async fn run_related( + config: &Config, + query_or_type: &str, + iid: Option, + limit: usize, + project: Option<&str>, +) -> Result { + let db_path = get_db_path(config.storage.db_path.as_deref()); + let conn = create_connection(&db_path)?; + + // Check if embeddings exist + let embedding_count: i64 = conn + .query_row("SELECT COUNT(*) FROM embedding_metadata", [], |row| { + row.get(0) + }) + .unwrap_or(0); + + if embedding_count == 0 { + return Err(LoreError::Other( + "No embeddings found. Run 'lore embed' first to generate vector embeddings.".into(), + )); + } + + // Validate input + if query_or_type.trim().is_empty() { + return Err(LoreError::Other( + "Query cannot be empty. Provide an entity type (issues/mrs) and IID, or a search query.".into(), + )); + } + + // Determine mode: entity vs query + let entity_type = match query_or_type.to_lowercase().as_str() { + "issues" | "issue" | "i" => Some("issue"), + "mrs" | "mr" | "m" | "merge_request" => Some("merge_request"), + _ => None, + }; + + if let Some(etype) = entity_type { + // Entity mode + let iid = iid.ok_or_else(|| { + LoreError::Other("Entity mode requires an IID (e.g., 'lore related issues 42')".into()) + })?; + run_related_entity(&conn, config, etype, iid, limit, project).await + } else { + // Query mode - treat query_or_type as free text + run_related_query(&conn, config, query_or_type, limit, project).await + } +} + +async fn run_related_entity( + conn: &Connection, + config: &Config, + entity_type: &str, + iid: i64, + limit: usize, + project_filter: Option<&str>, +) -> Result { + // Find the source document + let source_doc = find_entity_document(conn, entity_type, iid, project_filter)?; + let source_info = get_entity_info(conn, entity_type, source_doc.source_id)?; + + // Embed the source content + let embedding = embed_text(config, &source_doc.content_text).await?; + + // Search for similar documents (limit + 1 to account for filtering self) + let vector_results = search_vector(conn, &embedding, limit.saturating_add(1))?; + + // Filter out self and hydrate results + let source_labels = parse_label_names(&source_doc.label_names); + let mut results = Vec::new(); + let mut warnings = Vec::new(); + + for vr in vector_results { + // Skip self + if vr.document_id == source_doc.id { + continue; + } + + if let Some(result) = hydrate_result(conn, vr.document_id, vr.distance, &source_labels)? { + results.push(result); + } + + if results.len() >= limit { + break; + } + } + + // Check for low similarity + if !results.is_empty() && results.iter().all(|r| r.similarity_score < 0.3) { + warnings.push("No strongly related entities found (all scores < 0.3)".to_string()); + } + + Ok(RelatedResponse { + mode: "entity".to_string(), + source: Some(RelatedSource { + source_type: entity_type.to_string(), + iid, + title: source_info.title, + project_path: source_info.project_path, + }), + query: None, + results, + warnings, + }) +} + +async fn run_related_query( + conn: &Connection, + config: &Config, + query: &str, + limit: usize, + project_filter: Option<&str>, +) -> Result { + let mut warnings = Vec::new(); + + // Warn if query is very short + if query.split_whitespace().count() <= 2 { + warnings.push("Short queries may produce noisy results".to_string()); + } + + // Embed the query + let embedding = embed_text(config, query).await?; + + // Search for similar documents (fetch extra to allow for project filtering) + let vector_results = search_vector(conn, &embedding, limit.saturating_mul(2))?; + + // Filter by project if specified and hydrate + let project_id = project_filter + .map(|p| resolve_project(conn, p)) + .transpose()?; + + let mut results = Vec::new(); + let empty_labels: HashSet = HashSet::new(); + + for vr in vector_results { + // Check project filter + if let Some(pid) = project_id { + let doc_project_id: Option = conn + .query_row( + "SELECT project_id FROM documents WHERE id = ?1", + [vr.document_id], + |row| row.get(0), + ) + .ok(); + + if doc_project_id != Some(pid) { + continue; + } + } + + if let Some(result) = hydrate_result(conn, vr.document_id, vr.distance, &empty_labels)? { + results.push(result); + } + + if results.len() >= limit { + break; + } + } + + // Check for low similarity + if !results.is_empty() && results.iter().all(|r| r.similarity_score < 0.3) { + warnings.push("No strongly related entities found (all scores < 0.3)".to_string()); + } + + Ok(RelatedResponse { + mode: "query".to_string(), + source: None, + query: Some(query.to_string()), + results, + warnings, + }) +} + +// --------------------------------------------------------------------------- +// DB helpers +// --------------------------------------------------------------------------- + +fn find_entity_document( + conn: &Connection, + entity_type: &str, + iid: i64, + project_filter: Option<&str>, +) -> Result { + let table = match entity_type { + "issue" => "issues", + "merge_request" => "merge_requests", + _ => { + return Err(LoreError::Other(format!( + "Unknown entity type: {entity_type}" + ))); + } + }; + + let (sql, params): (String, Vec>) = match project_filter { + Some(project) => { + let project_id = resolve_project(conn, project)?; + ( + format!( + "SELECT d.id, d.source_type, d.source_id, d.project_id, d.title, d.url, + d.content_text, d.label_names, d.author_username, d.updated_at + FROM documents d + JOIN {table} e ON d.source_id = e.id + WHERE d.source_type = ?1 AND e.iid = ?2 AND e.project_id = ?3" + ), + vec![ + Box::new(entity_type.to_string()), + Box::new(iid), + Box::new(project_id), + ], + ) + } + None => ( + format!( + "SELECT d.id, d.source_type, d.source_id, d.project_id, d.title, d.url, + d.content_text, d.label_names, d.author_username, d.updated_at + FROM documents d + JOIN {table} e ON d.source_id = e.id + WHERE d.source_type = ?1 AND e.iid = ?2" + ), + vec![Box::new(entity_type.to_string()), Box::new(iid)], + ), + }; + + let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + let mut stmt = conn.prepare(&sql)?; + let rows: Vec = stmt + .query_map(param_refs.as_slice(), |row| { + Ok(DocumentRow { + id: row.get(0)?, + source_type: row.get(1)?, + source_id: row.get(2)?, + project_id: row.get(3)?, + title: row.get(4)?, + url: row.get(5)?, + content_text: row.get(6)?, + label_names: row.get(7)?, + author_username: row.get(8)?, + updated_at: row.get(9)?, + }) + })? + .collect::, _>>()?; + + match rows.len() { + 0 => Err(LoreError::NotFound(format!( + "{entity_type} #{iid} not found (run 'lore sync' first?)" + ))), + 1 => Ok(rows.into_iter().next().unwrap()), + _ => Err(LoreError::Ambiguous(format!( + "{entity_type} #{iid} exists in multiple projects. Use --project to specify." + ))), + } +} + +fn get_entity_info(conn: &Connection, entity_type: &str, entity_id: i64) -> Result { + let table = match entity_type { + "issue" => "issues", + "merge_request" => "merge_requests", + _ => { + return Err(LoreError::Other(format!( + "Unknown entity type: {entity_type}" + ))); + } + }; + + let sql = format!( + "SELECT e.iid, e.title, p.path_with_namespace + FROM {table} e + JOIN projects p ON e.project_id = p.id + WHERE e.id = ?1" + ); + + conn.query_row(&sql, [entity_id], |row| { + Ok(EntityInfo { + iid: row.get(0)?, + title: row.get(1)?, + project_path: row.get(2)?, + }) + }) + .map_err(|e| LoreError::NotFound(format!("Entity not found: {e}"))) +} + +fn hydrate_result( + conn: &Connection, + document_id: i64, + distance: f64, + source_labels: &HashSet, +) -> Result> { + let doc: Option = conn + .query_row( + "SELECT d.id, d.source_type, d.source_id, d.project_id, d.title, d.url, + d.content_text, d.label_names, d.author_username, d.updated_at + FROM documents d + WHERE d.id = ?1", + [document_id], + |row| { + Ok(DocumentRow { + id: row.get(0)?, + source_type: row.get(1)?, + source_id: row.get(2)?, + project_id: row.get(3)?, + title: row.get(4)?, + url: row.get(5)?, + content_text: row.get(6)?, + label_names: row.get(7)?, + author_username: row.get(8)?, + updated_at: row.get(9)?, + }) + }, + ) + .ok(); + + let Some(doc) = doc else { + return Ok(None); + }; + + // Skip discussion/note documents - we want entities only + if doc.source_type == "discussion" || doc.source_type == "note" { + return Ok(None); + } + + // Get IID from the source entity + let table = match doc.source_type.as_str() { + "issue" => "issues", + "merge_request" => "merge_requests", + _ => return Ok(None), + }; + + // Get IID and title from the source entity - skip gracefully if not found + // (this handles orphaned documents where the entity was deleted) + let entity_info: Option<(i64, String, String)> = conn + .query_row( + &format!( + "SELECT e.iid, e.title, p.path_with_namespace + FROM {table} e + JOIN projects p ON e.project_id = p.id + WHERE e.id = ?1" + ), + [doc.source_id], + |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)), + ) + .ok(); + + let Some((iid, title, project_path)) = entity_info else { + // Entity not found in database - skip this result + return Ok(None); + }; + + // Compute shared labels + let result_labels = parse_label_names(&doc.label_names); + let shared_labels: Vec = source_labels + .intersection(&result_labels) + .cloned() + .collect(); + + Ok(Some(RelatedResult { + source_type: doc.source_type, + iid, + title, + url: doc.url.unwrap_or_default(), + similarity_score: distance_to_similarity(distance), + project_path, + shared_labels, + author: doc.author_username, + updated_at: doc.updated_at.map(ms_to_iso).unwrap_or_default(), + })) +} + +// --------------------------------------------------------------------------- +// Embedding helper +// --------------------------------------------------------------------------- + +async fn embed_text(config: &Config, text: &str) -> Result> { + let ollama = OllamaClient::new(OllamaConfig { + base_url: config.embedding.base_url.clone(), + model: config.embedding.model.clone(), + timeout_secs: 60, + }); + + let embeddings = ollama.embed_batch(&[text]).await?; + embeddings + .into_iter() + .next() + .ok_or_else(|| LoreError::EmbeddingFailed { + document_id: 0, + reason: "No embedding returned".to_string(), + }) +} + +// --------------------------------------------------------------------------- +// Utilities +// --------------------------------------------------------------------------- + +/// Convert L2 distance to a 0-1 similarity score. +/// Uses inverse relationship: closer (lower distance) = higher similarity. +fn distance_to_similarity(distance: f64) -> f64 { + 1.0 / (1.0 + distance) +} + +fn parse_label_names(label_names_json: &Option) -> HashSet { + label_names_json + .as_deref() + .and_then(|s| serde_json::from_str::>(s).ok()) + .unwrap_or_default() + .into_iter() + .collect() +} + +// --------------------------------------------------------------------------- +// Printers +// --------------------------------------------------------------------------- + +pub fn print_related_human(response: &RelatedResponse) { + // Header + let header = match &response.source { + Some(src) => format!("Related to {} #{}: {}", src.source_type, src.iid, src.title), + None => format!( + "Related to query: \"{}\"", + response.query.as_deref().unwrap_or("") + ), + }; + println!("{}", Theme::bold().render(&header)); + println!("{}", "-".repeat(header.len().min(70))); + println!(); + + if response.results.is_empty() { + println!("No related entities found."); + return; + } + + for (i, result) in response.results.iter().enumerate() { + let type_icon = match result.source_type.as_str() { + "issue" => Icons::issue_opened(), + "merge_request" => Icons::mr_opened(), + _ => " ", + }; + + let score_bar_len = (result.similarity_score * 10.0) as usize; + let score_bar: String = "\u{2588}".repeat(score_bar_len); + + println!( + "{:>2}. {} {} #{} ({:.0}%) {}", + i + 1, + type_icon, + result.source_type, + result.iid, + result.similarity_score * 100.0, + score_bar + ); + println!(" {}", result.title); + println!( + " {} | @{}", + result.project_path, + result.author.as_deref().unwrap_or("?") + ); + + if !result.shared_labels.is_empty() { + println!(" Labels shared: {}", result.shared_labels.join(", ")); + } + println!(); + } + + // Warnings + for warning in &response.warnings { + println!("{} {}", Theme::warning().render(Icons::warning()), warning); + } +} + +pub fn print_related_json(response: &RelatedResponse, elapsed_ms: u64) { + let meta = RobotMeta { elapsed_ms }; + let output = serde_json::json!({ + "ok": true, + "data": response, + "meta": meta, + }); + match serde_json::to_string(&output) { + Ok(json) => println!("{json}"), + Err(e) => eprintln!("Error serializing to JSON: {e}"), + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_distance_to_similarity_identical() { + assert!((distance_to_similarity(0.0) - 1.0).abs() < f64::EPSILON); + } + + #[test] + fn test_distance_to_similarity_midpoint() { + assert!((distance_to_similarity(1.0) - 0.5).abs() < f64::EPSILON); + } + + #[test] + fn test_distance_to_similarity_large() { + let sim = distance_to_similarity(2.0); + assert!(sim > 0.0 && sim < 0.5); + assert!((sim - 0.333_333_333_333_333_3).abs() < 0.001); + } + + #[test] + fn test_distance_to_similarity_range() { + for d in [0.0, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0] { + let sim = distance_to_similarity(d); + assert!( + sim > 0.0 && sim <= 1.0, + "score {sim} out of range for distance {d}" + ); + } + } + + #[test] + fn test_parse_label_names_valid() { + let json = Some(r#"["bug", "priority::high"]"#.to_string()); + let labels = parse_label_names(&json); + assert!(labels.contains("bug")); + assert!(labels.contains("priority::high")); + assert_eq!(labels.len(), 2); + } + + #[test] + fn test_parse_label_names_empty() { + let labels = parse_label_names(&None); + assert!(labels.is_empty()); + } + + #[test] + fn test_parse_label_names_invalid_json() { + let json = Some("not valid json".to_string()); + let labels = parse_label_names(&json); + assert!(labels.is_empty()); + } + + #[test] + fn test_parse_label_names_empty_array() { + let json = Some("[]".to_string()); + let labels = parse_label_names(&json); + assert!(labels.is_empty()); + } +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs index e3d1565..92d328d 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -293,6 +293,28 @@ pub enum Commands { project: Option, }, + /// Find semantically related entities via vector search + #[command(after_help = "\x1b[1mExamples:\x1b[0m + lore related issues 42 # Find entities related to issue #42 + lore related mrs 99 -p group/repo # Related to MR #99 in specific project + lore related 'authentication flow' # Find entities matching free text query + lore --robot related issues 42 -n 5 # JSON output, limit 5 results")] + Related { + /// Entity type (issues, mrs) or free text query + query_or_type: String, + + /// Entity IID (required when first arg is entity type) + iid: Option, + + /// Maximum results + #[arg(short = 'n', long, default_value = "10")] + limit: usize, + + /// Scope to project (fuzzy match) + #[arg(short, long)] + project: Option, + }, + /// Manage cron-based automatic syncing #[command(after_help = "\x1b[1mExamples:\x1b[0m lore cron install # Install cron job (every 8 minutes) diff --git a/src/main.rs b/src/main.rs index 2a6d29f..d5cc41b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,15 +18,16 @@ use lore::cli::commands::{ print_event_count, print_event_count_json, print_file_history, print_file_history_json, print_generate_docs, print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json, - print_list_notes, print_list_notes_json, print_search_results, print_search_results_json, - print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, print_stats, - print_stats_json, print_sync, print_sync_json, print_sync_status, print_sync_status_json, - print_timeline, print_timeline_json_with_meta, print_trace, print_trace_json, print_who_human, - print_who_json, query_notes, run_auth_test, run_count, run_count_events, run_cron_install, - run_cron_status, run_cron_uninstall, run_doctor, run_drift, run_embed, run_file_history, - run_generate_docs, run_ingest, run_ingest_dry_run, run_init, run_list_issues, run_list_mrs, - run_me, run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, - run_timeline, run_token_set, run_token_show, run_who, + print_list_notes, print_list_notes_json, print_related_human, print_related_json, + print_search_results, print_search_results_json, print_show_issue, print_show_issue_json, + print_show_mr, print_show_mr_json, print_stats, print_stats_json, print_sync, print_sync_json, + print_sync_status, print_sync_status_json, print_timeline, print_timeline_json_with_meta, + print_trace, print_trace_json, print_who_human, print_who_json, query_notes, run_auth_test, + run_count, run_count_events, run_cron_install, run_cron_status, run_cron_uninstall, run_doctor, + run_drift, run_embed, run_file_history, run_generate_docs, run_ingest, run_ingest_dry_run, + run_init, run_list_issues, run_list_mrs, run_me, run_related, run_search, run_show_issue, + run_show_mr, run_stats, run_sync, run_sync_status, run_timeline, run_token_set, run_token_show, + run_who, }; use lore::cli::render::{ColorMode, GlyphMode, Icons, LoreRenderer, Theme}; use lore::cli::robot::{RobotMeta, strip_schemas}; @@ -225,6 +226,22 @@ async fn main() { ) .await } + Some(Commands::Related { + query_or_type, + iid, + limit, + project, + }) => { + handle_related( + cli.config.as_deref(), + &query_or_type, + iid, + limit, + project.as_deref(), + robot_mode, + ) + .await + } Some(Commands::Stats(args)) => handle_stats(cli.config.as_deref(), args, robot_mode).await, Some(Commands::Embed(args)) => handle_embed(cli.config.as_deref(), args, robot_mode).await, Some(Commands::Sync(args)) => { @@ -1996,7 +2013,7 @@ async fn handle_timeline( if robot_mode { print_timeline_json_with_meta( &result, - result.total_events_before_limit, + result.total_filtered_events, params.depth, !params.no_mentions, args.fields.as_deref(), @@ -3256,6 +3273,28 @@ async fn handle_drift( Ok(()) } +async fn handle_related( + config_override: Option<&str>, + query_or_type: &str, + iid: Option, + limit: usize, + project: Option<&str>, + robot_mode: bool, +) -> Result<(), Box> { + let start = std::time::Instant::now(); + let config = Config::load(config_override)?; + let effective_project = config.effective_project(project); + let response = run_related(&config, query_or_type, iid, limit, effective_project).await?; + let elapsed_ms = start.elapsed().as_millis() as u64; + + if robot_mode { + print_related_json(&response, elapsed_ms); + } else { + print_related_human(&response); + } + Ok(()) +} + #[allow(clippy::too_many_arguments)] async fn handle_list_compat( config_override: Option<&str>,