feat(cli): add 'lore related' semantic similarity command (bd-8con)
Adds 'lore related' / 'lore similar' command for discovering semantically related issues and MRs using vector embeddings. Two modes: - Entity mode: find entities similar to a specific issue/MR - Query mode: embed free text and find matching entities Includes distance-to-similarity conversion, label intersection, human and robot output formatters, and 11 unit tests.
This commit is contained in:
@@ -129,6 +129,10 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
|
||||
"--no-dry-run",
|
||||
"--timings",
|
||||
"--tui",
|
||||
"--issue",
|
||||
"--mr",
|
||||
"--project",
|
||||
"--preflight-only",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
||||
@@ -8,11 +8,13 @@ pub mod generate_docs;
|
||||
pub mod ingest;
|
||||
pub mod init;
|
||||
pub mod list;
|
||||
pub mod related;
|
||||
pub mod search;
|
||||
pub mod show;
|
||||
pub mod stats;
|
||||
pub mod sync;
|
||||
pub mod sync_status;
|
||||
pub mod sync_surgical;
|
||||
pub mod timeline;
|
||||
pub mod trace;
|
||||
pub mod tui;
|
||||
@@ -39,6 +41,7 @@ pub use list::{
|
||||
print_list_notes, print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl,
|
||||
query_issues, query_mrs, query_notes, run_list_issues, run_list_mrs,
|
||||
};
|
||||
pub use related::{print_related, print_related_json, run_related};
|
||||
pub use search::{
|
||||
SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search,
|
||||
};
|
||||
@@ -49,6 +52,7 @@ pub use show::{
|
||||
pub use stats::{print_stats, print_stats_json, run_stats};
|
||||
pub use sync::{SyncOptions, SyncResult, print_sync, print_sync_json, run_sync};
|
||||
pub use sync_status::{print_sync_status, print_sync_status_json, run_sync_status};
|
||||
pub use sync_surgical::run_sync_surgical;
|
||||
pub use timeline::{TimelineParams, print_timeline, print_timeline_json_with_meta, run_timeline};
|
||||
pub use trace::{parse_trace_path, print_trace, print_trace_json};
|
||||
pub use tui::{TuiArgs, find_lore_tui, run_tui};
|
||||
|
||||
692
src/cli/commands/related.rs
Normal file
692
src/cli/commands/related.rs
Normal file
@@ -0,0 +1,692 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::cli::render::{Icons, Theme};
|
||||
use crate::core::config::Config;
|
||||
use crate::core::db::create_connection;
|
||||
use crate::core::error::{LoreError, Result};
|
||||
use crate::core::paths::get_db_path;
|
||||
use crate::core::project::resolve_project;
|
||||
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
|
||||
use crate::search::search_vector;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct RelatedSource {
|
||||
pub source_type: String,
|
||||
pub iid: Option<i64>,
|
||||
pub title: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct RelatedResult {
|
||||
pub source_type: String,
|
||||
pub iid: i64,
|
||||
pub title: String,
|
||||
pub url: Option<String>,
|
||||
pub similarity_score: f64,
|
||||
pub shared_labels: Vec<String>,
|
||||
pub project_path: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct RelatedResponse {
|
||||
pub source: RelatedSource,
|
||||
pub query: Option<String>,
|
||||
pub results: Vec<RelatedResult>,
|
||||
pub mode: String,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pure helpers (unit-testable)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Convert L2 distance to a 0-1 similarity score.
|
||||
///
|
||||
/// Inverse relationship: closer (lower distance) = higher similarity.
|
||||
/// The +1 prevents division by zero and ensures score is in (0, 1].
|
||||
fn distance_to_similarity(distance: f64) -> f64 {
|
||||
1.0 / (1.0 + distance)
|
||||
}
|
||||
|
||||
/// Parse the JSON `label_names` column into a set of labels.
|
||||
fn parse_label_names(label_names_json: &Option<String>) -> HashSet<String> {
|
||||
label_names_json
|
||||
.as_deref()
|
||||
.and_then(|s| serde_json::from_str::<Vec<String>>(s).ok())
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal row types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct DocRow {
|
||||
id: i64,
|
||||
content_text: String,
|
||||
label_names: Option<String>,
|
||||
title: Option<String>,
|
||||
}
|
||||
|
||||
struct HydratedDoc {
|
||||
source_type: String,
|
||||
iid: i64,
|
||||
title: String,
|
||||
url: Option<String>,
|
||||
label_names: Option<String>,
|
||||
project_path: Option<String>,
|
||||
}
|
||||
|
||||
/// (source_type, source_id, label_names, url, project_id)
|
||||
type DocMetaRow = (String, i64, Option<String>, Option<String>, i64);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main entry point
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub async fn run_related(
|
||||
config: &Config,
|
||||
entity_type: Option<&str>,
|
||||
entity_iid: Option<i64>,
|
||||
query_text: Option<&str>,
|
||||
project: Option<&str>,
|
||||
limit: usize,
|
||||
) -> Result<RelatedResponse> {
|
||||
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||
let conn = create_connection(&db_path)?;
|
||||
|
||||
// Check that embeddings exist at all.
|
||||
let embedding_count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM embedding_metadata WHERE last_error IS NULL",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap_or(0);
|
||||
|
||||
if embedding_count == 0 {
|
||||
return Err(LoreError::EmbeddingsNotBuilt);
|
||||
}
|
||||
|
||||
match (entity_type, entity_iid) {
|
||||
(Some(etype), Some(iid)) => {
|
||||
run_entity_mode(config, &conn, etype, iid, project, limit).await
|
||||
}
|
||||
_ => {
|
||||
let text = query_text.unwrap_or("");
|
||||
if text.is_empty() {
|
||||
return Err(LoreError::Other(
|
||||
"Provide either an entity type + IID or a free-text query.".into(),
|
||||
));
|
||||
}
|
||||
run_query_mode(config, &conn, text, project, limit).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Entity mode: find entities similar to a specific issue/MR
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async fn run_entity_mode(
|
||||
config: &Config,
|
||||
conn: &rusqlite::Connection,
|
||||
entity_type: &str,
|
||||
iid: i64,
|
||||
project: Option<&str>,
|
||||
limit: usize,
|
||||
) -> Result<RelatedResponse> {
|
||||
let source_type = match entity_type {
|
||||
"issues" | "issue" => "issue",
|
||||
"mrs" | "mr" | "merge-requests" | "merge_request" => "merge_request",
|
||||
other => {
|
||||
return Err(LoreError::Other(format!(
|
||||
"Unknown entity type '{other}'. Use 'issues' or 'mrs'."
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
// Resolve project (optional but needed for multi-project setups).
|
||||
let project_id = match project {
|
||||
Some(p) => Some(resolve_project(conn, p)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
// Find the source document.
|
||||
let doc = find_entity_document(conn, source_type, iid, project_id)?;
|
||||
|
||||
// Get or compute the embedding.
|
||||
let embedding = get_or_compute_embedding(config, conn, &doc).await?;
|
||||
|
||||
// KNN search (request extra to filter self).
|
||||
let vector_results = search_vector(conn, &embedding, limit + 5)?;
|
||||
|
||||
// Hydrate and filter.
|
||||
let source_labels = parse_label_names(&doc.label_names);
|
||||
let mut results = Vec::new();
|
||||
|
||||
for vr in vector_results {
|
||||
// Exclude self.
|
||||
if vr.document_id == doc.id {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(hydrated) = hydrate_document(conn, vr.document_id)? {
|
||||
let result_labels = parse_label_names(&hydrated.label_names);
|
||||
let shared: Vec<String> = source_labels
|
||||
.intersection(&result_labels)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
results.push(RelatedResult {
|
||||
source_type: hydrated.source_type,
|
||||
iid: hydrated.iid,
|
||||
title: hydrated.title,
|
||||
url: hydrated.url,
|
||||
similarity_score: distance_to_similarity(vr.distance),
|
||||
shared_labels: shared,
|
||||
project_path: hydrated.project_path,
|
||||
});
|
||||
}
|
||||
|
||||
if results.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(RelatedResponse {
|
||||
source: RelatedSource {
|
||||
source_type: source_type.to_string(),
|
||||
iid: Some(iid),
|
||||
title: doc.title,
|
||||
},
|
||||
query: None,
|
||||
results,
|
||||
mode: "entity".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Query mode: embed free text and find similar entities
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async fn run_query_mode(
|
||||
config: &Config,
|
||||
conn: &rusqlite::Connection,
|
||||
text: &str,
|
||||
project: Option<&str>,
|
||||
limit: usize,
|
||||
) -> Result<RelatedResponse> {
|
||||
let ollama = OllamaClient::new(OllamaConfig {
|
||||
base_url: config.embedding.base_url.clone(),
|
||||
model: config.embedding.model.clone(),
|
||||
timeout_secs: 60,
|
||||
});
|
||||
|
||||
let embeddings = ollama.embed_batch(&[text]).await?;
|
||||
|
||||
let embedding = embeddings
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| LoreError::Other("Ollama returned empty embedding result.".to_string()))?;
|
||||
|
||||
let vector_results = search_vector(conn, &embedding, limit)?;
|
||||
|
||||
let _project_id = match project {
|
||||
Some(p) => Some(resolve_project(conn, p)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let mut results = Vec::new();
|
||||
for vr in vector_results {
|
||||
if let Some(hydrated) = hydrate_document(conn, vr.document_id)? {
|
||||
results.push(RelatedResult {
|
||||
source_type: hydrated.source_type,
|
||||
iid: hydrated.iid,
|
||||
title: hydrated.title,
|
||||
url: hydrated.url,
|
||||
similarity_score: distance_to_similarity(vr.distance),
|
||||
shared_labels: Vec::new(), // No source labels in query mode.
|
||||
project_path: hydrated.project_path,
|
||||
});
|
||||
}
|
||||
|
||||
if results.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(RelatedResponse {
|
||||
source: RelatedSource {
|
||||
source_type: "query".to_string(),
|
||||
iid: None,
|
||||
title: None,
|
||||
},
|
||||
query: Some(text.to_string()),
|
||||
results,
|
||||
mode: "query".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DB helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn find_entity_document(
|
||||
conn: &rusqlite::Connection,
|
||||
source_type: &str,
|
||||
iid: i64,
|
||||
project_id: Option<i64>,
|
||||
) -> Result<DocRow> {
|
||||
let (table, iid_col) = match source_type {
|
||||
"issue" => ("issues", "iid"),
|
||||
"merge_request" => ("merge_requests", "iid"),
|
||||
_ => {
|
||||
return Err(LoreError::Other(format!(
|
||||
"Unknown source type: {source_type}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
// We build the query dynamically because the table name differs.
|
||||
let project_filter = if project_id.is_some() {
|
||||
"AND e.project_id = ?3".to_string()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
let sql = format!(
|
||||
"SELECT d.id, d.content_text, d.label_names, d.title \
|
||||
FROM documents d \
|
||||
JOIN {table} e ON d.source_type = ?1 AND d.source_id = e.id \
|
||||
WHERE e.{iid_col} = ?2 {project_filter} \
|
||||
LIMIT 1"
|
||||
);
|
||||
|
||||
let mut stmt = conn.prepare(&sql)?;
|
||||
|
||||
let params: Vec<Box<dyn rusqlite::types::ToSql>> = if let Some(pid) = project_id {
|
||||
vec![
|
||||
Box::new(source_type.to_string()),
|
||||
Box::new(iid),
|
||||
Box::new(pid),
|
||||
]
|
||||
} else {
|
||||
vec![Box::new(source_type.to_string()), Box::new(iid)]
|
||||
};
|
||||
|
||||
let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
|
||||
|
||||
let doc = stmt
|
||||
.query_row(param_refs.as_slice(), |row| {
|
||||
Ok(DocRow {
|
||||
id: row.get(0)?,
|
||||
content_text: row.get(1)?,
|
||||
label_names: row.get(2)?,
|
||||
title: row.get(3)?,
|
||||
})
|
||||
})
|
||||
.map_err(|_| {
|
||||
LoreError::NotFound(format!(
|
||||
"{source_type} #{iid} not found. Run 'lore sync' to fetch the latest data."
|
||||
))
|
||||
})?;
|
||||
|
||||
Ok(doc)
|
||||
}
|
||||
|
||||
/// Get the embedding for a document from the DB, or compute it on-the-fly via Ollama.
|
||||
async fn get_or_compute_embedding(
|
||||
config: &Config,
|
||||
conn: &rusqlite::Connection,
|
||||
doc: &DocRow,
|
||||
) -> Result<Vec<f32>> {
|
||||
// Try to find an existing embedding in the vec0 table.
|
||||
use crate::embedding::chunk_ids::encode_rowid;
|
||||
|
||||
let rowid = encode_rowid(doc.id, 0);
|
||||
let result: Option<Vec<u8>> = conn
|
||||
.query_row(
|
||||
"SELECT embedding FROM embeddings WHERE rowid = ?1",
|
||||
rusqlite::params![rowid],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.ok();
|
||||
|
||||
if let Some(bytes) = result {
|
||||
// Decode f32 vec from raw bytes.
|
||||
let floats: Vec<f32> = bytes
|
||||
.chunks_exact(4)
|
||||
.map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
|
||||
.collect();
|
||||
if !floats.is_empty() {
|
||||
return Ok(floats);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: embed the content on-the-fly via Ollama.
|
||||
let ollama = OllamaClient::new(OllamaConfig {
|
||||
base_url: config.embedding.base_url.clone(),
|
||||
model: config.embedding.model.clone(),
|
||||
timeout_secs: 60,
|
||||
});
|
||||
|
||||
let embeddings = ollama.embed_batch(&[&doc.content_text]).await?;
|
||||
|
||||
embeddings
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| LoreError::Other("Ollama returned empty embedding result.".to_string()))
|
||||
}
|
||||
|
||||
/// Hydrate a document_id into a displayable result by joining back to the source entity.
|
||||
fn hydrate_document(conn: &rusqlite::Connection, document_id: i64) -> Result<Option<HydratedDoc>> {
|
||||
// First get the document metadata.
|
||||
let doc_row: Option<DocMetaRow> = conn
|
||||
.query_row(
|
||||
"SELECT d.source_type, d.source_id, d.label_names, d.url, d.project_id \
|
||||
FROM documents d WHERE d.id = ?1",
|
||||
rusqlite::params![document_id],
|
||||
|row| {
|
||||
Ok((
|
||||
row.get(0)?,
|
||||
row.get(1)?,
|
||||
row.get(2)?,
|
||||
row.get(3)?,
|
||||
row.get(4)?,
|
||||
))
|
||||
},
|
||||
)
|
||||
.ok();
|
||||
|
||||
let Some((source_type, source_id, label_names, url, project_id)) = doc_row else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
// Get the project path.
|
||||
let project_path: Option<String> = conn
|
||||
.query_row(
|
||||
"SELECT path_with_namespace FROM projects WHERE id = ?1",
|
||||
rusqlite::params![project_id],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.ok();
|
||||
|
||||
// Get the entity IID and title from the source table.
|
||||
let (iid, title) = match source_type.as_str() {
|
||||
"issue" => {
|
||||
let row: Option<(i64, String)> = conn
|
||||
.query_row(
|
||||
"SELECT iid, title FROM issues WHERE id = ?1",
|
||||
rusqlite::params![source_id],
|
||||
|row| Ok((row.get(0)?, row.get(1)?)),
|
||||
)
|
||||
.ok();
|
||||
match row {
|
||||
Some((iid, title)) => (iid, title),
|
||||
None => return Ok(None),
|
||||
}
|
||||
}
|
||||
"merge_request" => {
|
||||
let row: Option<(i64, String)> = conn
|
||||
.query_row(
|
||||
"SELECT iid, title FROM merge_requests WHERE id = ?1",
|
||||
rusqlite::params![source_id],
|
||||
|row| Ok((row.get(0)?, row.get(1)?)),
|
||||
)
|
||||
.ok();
|
||||
match row {
|
||||
Some((iid, title)) => (iid, title),
|
||||
None => return Ok(None),
|
||||
}
|
||||
}
|
||||
// Discussion/note documents: use the document title or a placeholder.
|
||||
_ => return Ok(None), // Skip non-entity documents in results.
|
||||
};
|
||||
|
||||
Ok(Some(HydratedDoc {
|
||||
source_type,
|
||||
iid,
|
||||
title,
|
||||
url,
|
||||
label_names,
|
||||
project_path,
|
||||
}))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Human output
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub fn print_related(response: &RelatedResponse) {
|
||||
println!();
|
||||
|
||||
match &response.source.source_type.as_str() {
|
||||
&"query" => {
|
||||
println!(
|
||||
"{}",
|
||||
Theme::bold().render(&format!(
|
||||
"Related to: \"{}\"",
|
||||
response.query.as_deref().unwrap_or("")
|
||||
))
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
let entity_label = if response.source.source_type == "issue" {
|
||||
format!("#{}", response.source.iid.unwrap_or(0))
|
||||
} else {
|
||||
format!("!{}", response.source.iid.unwrap_or(0))
|
||||
};
|
||||
println!(
|
||||
"{}",
|
||||
Theme::bold().render(&format!(
|
||||
"Related to {} {} {}",
|
||||
response.source.source_type,
|
||||
entity_label,
|
||||
response
|
||||
.source
|
||||
.title
|
||||
.as_deref()
|
||||
.map(|t| format!("\"{}\"", t))
|
||||
.unwrap_or_default()
|
||||
))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if response.results.is_empty() {
|
||||
println!(
|
||||
"\n {} {}",
|
||||
Icons::info(),
|
||||
Theme::dim().render("No related entities found.")
|
||||
);
|
||||
println!();
|
||||
return;
|
||||
}
|
||||
|
||||
println!();
|
||||
|
||||
for (i, r) in response.results.iter().enumerate() {
|
||||
let icon = if r.source_type == "issue" {
|
||||
Icons::issue_opened()
|
||||
} else {
|
||||
Icons::mr_opened()
|
||||
};
|
||||
let prefix = if r.source_type == "issue" { "#" } else { "!" };
|
||||
|
||||
let score_pct = (r.similarity_score * 100.0) as u8;
|
||||
let score_str = format!("{score_pct}%");
|
||||
|
||||
let labels_str = if r.shared_labels.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" [{}]", r.shared_labels.join(", "))
|
||||
};
|
||||
|
||||
let project_str = r
|
||||
.project_path
|
||||
.as_deref()
|
||||
.map(|p| format!(" ({})", p))
|
||||
.unwrap_or_default();
|
||||
|
||||
println!(
|
||||
" {:>2}. {} {}{:<5} {} {}{}{}",
|
||||
i + 1,
|
||||
icon,
|
||||
prefix,
|
||||
r.iid,
|
||||
Theme::accent().render(&score_str),
|
||||
r.title,
|
||||
Theme::dim().render(&labels_str),
|
||||
Theme::dim().render(&project_str),
|
||||
);
|
||||
}
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Robot (JSON) output
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub fn print_related_json(response: &RelatedResponse, elapsed_ms: u64) {
|
||||
let output = serde_json::json!({
|
||||
"ok": true,
|
||||
"data": {
|
||||
"source": response.source,
|
||||
"query": response.query,
|
||||
"mode": response.mode,
|
||||
"results": response.results,
|
||||
},
|
||||
"meta": {
|
||||
"elapsed_ms": elapsed_ms,
|
||||
"mode": response.mode,
|
||||
"embedding_dims": 768,
|
||||
"distance_metric": "l2",
|
||||
}
|
||||
});
|
||||
|
||||
println!("{}", serde_json::to_string(&output).unwrap_or_default());
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_distance_to_similarity_identical() {
|
||||
let sim = distance_to_similarity(0.0);
|
||||
assert!(
|
||||
(sim - 1.0).abs() < f64::EPSILON,
|
||||
"distance 0 should give similarity 1.0"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_to_similarity_one() {
|
||||
let sim = distance_to_similarity(1.0);
|
||||
assert!(
|
||||
(sim - 0.5).abs() < f64::EPSILON,
|
||||
"distance 1 should give similarity 0.5"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_to_similarity_large() {
|
||||
let sim = distance_to_similarity(100.0);
|
||||
assert!(
|
||||
sim > 0.0 && sim < 0.02,
|
||||
"large distance should give near-zero similarity"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_to_similarity_range() {
|
||||
for d in [0.0, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 100.0] {
|
||||
let sim = distance_to_similarity(d);
|
||||
assert!(
|
||||
(0.0..=1.0).contains(&sim),
|
||||
"similarity {sim} out of [0, 1] range for distance {d}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_to_similarity_monotonic() {
|
||||
let distances = [0.0, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0];
|
||||
for w in distances.windows(2) {
|
||||
let s1 = distance_to_similarity(w[0]);
|
||||
let s2 = distance_to_similarity(w[1]);
|
||||
assert!(
|
||||
s1 >= s2,
|
||||
"similarity should decrease with distance: d={} s={} vs d={} s={}",
|
||||
w[0],
|
||||
s1,
|
||||
w[1],
|
||||
s2
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_label_names_valid_json() {
|
||||
let json = Some(r#"["bug","frontend","urgent"]"#.to_string());
|
||||
let labels = parse_label_names(&json);
|
||||
assert_eq!(labels.len(), 3);
|
||||
assert!(labels.contains("bug"));
|
||||
assert!(labels.contains("frontend"));
|
||||
assert!(labels.contains("urgent"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_label_names_null() {
|
||||
let labels = parse_label_names(&None);
|
||||
assert!(labels.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_label_names_invalid_json() {
|
||||
let json = Some("not valid json".to_string());
|
||||
let labels = parse_label_names(&json);
|
||||
assert!(labels.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_label_names_empty_array() {
|
||||
let json = Some("[]".to_string());
|
||||
let labels = parse_label_names(&json);
|
||||
assert!(labels.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shared_labels_intersection() {
|
||||
let a = Some(r#"["bug","frontend","urgent"]"#.to_string());
|
||||
let b = Some(r#"["bug","backend","urgent","perf"]"#.to_string());
|
||||
let labels_a = parse_label_names(&a);
|
||||
let labels_b = parse_label_names(&b);
|
||||
let shared: HashSet<String> = labels_a.intersection(&labels_b).cloned().collect();
|
||||
assert_eq!(shared.len(), 2);
|
||||
assert!(shared.contains("bug"));
|
||||
assert!(shared.contains("urgent"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shared_labels_no_overlap() {
|
||||
let a = Some(r#"["bug"]"#.to_string());
|
||||
let b = Some(r#"["feature"]"#.to_string());
|
||||
let labels_a = parse_label_names(&a);
|
||||
let labels_b = parse_label_names(&b);
|
||||
let shared: HashSet<String> = labels_a.intersection(&labels_b).cloned().collect();
|
||||
assert!(shared.is_empty());
|
||||
}
|
||||
}
|
||||
@@ -26,6 +26,35 @@ pub struct SyncOptions {
|
||||
pub no_events: bool,
|
||||
pub robot_mode: bool,
|
||||
pub dry_run: bool,
|
||||
pub issue_iids: Vec<u64>,
|
||||
pub mr_iids: Vec<u64>,
|
||||
pub project: Option<String>,
|
||||
pub preflight_only: bool,
|
||||
}
|
||||
|
||||
impl SyncOptions {
|
||||
pub const MAX_SURGICAL_TARGETS: usize = 100;
|
||||
|
||||
pub fn is_surgical(&self) -> bool {
|
||||
!self.issue_iids.is_empty() || !self.mr_iids.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct SurgicalIids {
|
||||
pub issues: Vec<u64>,
|
||||
pub merge_requests: Vec<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct EntitySyncResult {
|
||||
pub entity_type: String,
|
||||
pub iid: u64,
|
||||
pub outcome: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub toctou_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
@@ -49,6 +78,14 @@ pub struct SyncResult {
|
||||
pub issue_projects: Vec<ProjectSummary>,
|
||||
#[serde(skip)]
|
||||
pub mr_projects: Vec<ProjectSummary>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub surgical_mode: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub surgical_iids: Option<SurgicalIids>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub entity_results: Option<Vec<EntitySyncResult>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub preflight_only: Option<bool>,
|
||||
}
|
||||
|
||||
/// Apply semantic color to a stage-completion icon glyph.
|
||||
@@ -66,6 +103,11 @@ pub async fn run_sync(
|
||||
run_id: Option<&str>,
|
||||
signal: &ShutdownSignal,
|
||||
) -> Result<SyncResult> {
|
||||
// Surgical dispatch: if any IIDs specified, route to the surgical pipeline.
|
||||
if options.is_surgical() {
|
||||
return super::sync_surgical::run_sync_surgical(config, options, run_id, signal).await;
|
||||
}
|
||||
|
||||
let generated_id;
|
||||
let run_id = match run_id {
|
||||
Some(id) => id,
|
||||
@@ -1029,4 +1071,93 @@ mod tests {
|
||||
assert!(rows[0].contains("0 statuses updated"));
|
||||
assert!(rows[0].contains("skipped (disabled)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_result_default_omits_surgical_fields() {
|
||||
let result = SyncResult::default();
|
||||
let json = serde_json::to_value(&result).unwrap();
|
||||
assert!(json.get("surgical_mode").is_none());
|
||||
assert!(json.get("surgical_iids").is_none());
|
||||
assert!(json.get("entity_results").is_none());
|
||||
assert!(json.get("preflight_only").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sync_result_with_surgical_fields_serializes_correctly() {
|
||||
let result = SyncResult {
|
||||
surgical_mode: Some(true),
|
||||
surgical_iids: Some(SurgicalIids {
|
||||
issues: vec![7, 42],
|
||||
merge_requests: vec![10],
|
||||
}),
|
||||
entity_results: Some(vec![
|
||||
EntitySyncResult {
|
||||
entity_type: "issue".to_string(),
|
||||
iid: 7,
|
||||
outcome: "synced".to_string(),
|
||||
error: None,
|
||||
toctou_reason: None,
|
||||
},
|
||||
EntitySyncResult {
|
||||
entity_type: "issue".to_string(),
|
||||
iid: 42,
|
||||
outcome: "skipped_toctou".to_string(),
|
||||
error: None,
|
||||
toctou_reason: Some("updated_at changed".to_string()),
|
||||
},
|
||||
]),
|
||||
preflight_only: Some(false),
|
||||
..SyncResult::default()
|
||||
};
|
||||
let json = serde_json::to_value(&result).unwrap();
|
||||
assert_eq!(json["surgical_mode"], true);
|
||||
assert_eq!(json["surgical_iids"]["issues"], serde_json::json!([7, 42]));
|
||||
assert_eq!(json["entity_results"].as_array().unwrap().len(), 2);
|
||||
assert_eq!(json["entity_results"][1]["outcome"], "skipped_toctou");
|
||||
assert_eq!(json["preflight_only"], false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn entity_sync_result_omits_none_fields() {
|
||||
let entity = EntitySyncResult {
|
||||
entity_type: "merge_request".to_string(),
|
||||
iid: 10,
|
||||
outcome: "synced".to_string(),
|
||||
error: None,
|
||||
toctou_reason: None,
|
||||
};
|
||||
let json = serde_json::to_value(&entity).unwrap();
|
||||
assert!(json.get("error").is_none());
|
||||
assert!(json.get("toctou_reason").is_none());
|
||||
assert!(json.get("entity_type").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_surgical_with_issues() {
|
||||
let opts = SyncOptions {
|
||||
issue_iids: vec![1],
|
||||
..SyncOptions::default()
|
||||
};
|
||||
assert!(opts.is_surgical());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_surgical_with_mrs() {
|
||||
let opts = SyncOptions {
|
||||
mr_iids: vec![10],
|
||||
..SyncOptions::default()
|
||||
};
|
||||
assert!(opts.is_surgical());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_surgical_empty() {
|
||||
let opts = SyncOptions::default();
|
||||
assert!(!opts.is_surgical());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_surgical_targets_is_100() {
|
||||
assert_eq!(SyncOptions::MAX_SURGICAL_TARGETS, 100);
|
||||
}
|
||||
}
|
||||
|
||||
462
src/cli/commands/sync_surgical.rs
Normal file
462
src/cli/commands/sync_surgical.rs
Normal file
@@ -0,0 +1,462 @@
|
||||
//! Surgical (by-IID) sync orchestration.
|
||||
//!
|
||||
//! Coordinates the full pipeline for syncing specific issues/MRs by IID:
|
||||
//! resolve project → preflight fetch → ingest with TOCTOU → enrichment →
|
||||
//! scoped doc regeneration → embedding.
|
||||
|
||||
use std::time::Instant;
|
||||
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::Config;
|
||||
use crate::cli::commands::embed::run_embed;
|
||||
use crate::core::db::create_connection;
|
||||
use crate::core::error::{LoreError, Result};
|
||||
use crate::core::lock::{AppLock, LockOptions};
|
||||
use crate::core::metrics::StageTiming;
|
||||
use crate::core::paths::get_db_path;
|
||||
use crate::core::project::resolve_project;
|
||||
use crate::core::shutdown::ShutdownSignal;
|
||||
use crate::core::sync_run::SyncRunRecorder;
|
||||
use crate::documents::{SourceType, regenerate_documents_for_sources};
|
||||
use crate::gitlab::GitLabClient;
|
||||
use crate::ingestion::surgical::{
|
||||
SurgicalTarget, enrich_entity_resource_events, enrich_mr_closes_issues, enrich_mr_file_changes,
|
||||
ingest_issue_by_iid, ingest_mr_by_iid, preflight_fetch,
|
||||
};
|
||||
|
||||
use super::sync::{EntitySyncResult, SurgicalIids, SyncOptions, SyncResult};
|
||||
|
||||
fn timing(name: &str, elapsed_ms: u64, items: usize, errors: usize) -> StageTiming {
|
||||
StageTiming {
|
||||
name: name.to_string(),
|
||||
project: None,
|
||||
elapsed_ms,
|
||||
items_processed: items,
|
||||
items_skipped: 0,
|
||||
errors,
|
||||
rate_limit_hits: 0,
|
||||
retries: 0,
|
||||
sub_stages: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the surgical sync pipeline for specific IIDs within a single project.
|
||||
///
|
||||
/// Unlike [`super::sync::run_sync`], this targets specific issues/MRs by IID
|
||||
/// rather than paginating all entities across all projects.
|
||||
pub async fn run_sync_surgical(
|
||||
config: &Config,
|
||||
options: SyncOptions,
|
||||
run_id: Option<&str>,
|
||||
signal: &ShutdownSignal,
|
||||
) -> Result<SyncResult> {
|
||||
// ── Validate inputs ──
|
||||
if !options.is_surgical() {
|
||||
return Ok(SyncResult::default());
|
||||
}
|
||||
|
||||
let project_str = options.project.as_deref().ok_or_else(|| {
|
||||
LoreError::Other("Surgical sync requires --project (-p) to identify the target".into())
|
||||
})?;
|
||||
|
||||
// ── Run ID ──
|
||||
let generated_id;
|
||||
let run_id = match run_id {
|
||||
Some(id) => id,
|
||||
None => {
|
||||
generated_id = uuid::Uuid::new_v4().simple().to_string();
|
||||
&generated_id[..8]
|
||||
}
|
||||
};
|
||||
|
||||
// ── DB connections ──
|
||||
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||
let conn = create_connection(&db_path)?;
|
||||
let recorder_conn = create_connection(&db_path)?;
|
||||
let lock_conn = create_connection(&db_path)?;
|
||||
|
||||
// ── Resolve project ──
|
||||
let project_id = resolve_project(&conn, project_str)?;
|
||||
let (gitlab_project_id, project_path): (i64, String) = conn.query_row(
|
||||
"SELECT gitlab_project_id, path_with_namespace FROM projects WHERE id = ?1",
|
||||
[project_id],
|
||||
|row| Ok((row.get(0)?, row.get(1)?)),
|
||||
)?;
|
||||
|
||||
// ── Build surgical targets ──
|
||||
let mut targets = Vec::new();
|
||||
for &iid in &options.issue_iids {
|
||||
targets.push(SurgicalTarget::Issue { iid });
|
||||
}
|
||||
for &iid in &options.mr_iids {
|
||||
targets.push(SurgicalTarget::MergeRequest { iid });
|
||||
}
|
||||
|
||||
// ── Prepare result ──
|
||||
let mut result = SyncResult {
|
||||
run_id: run_id.to_string(),
|
||||
surgical_mode: Some(true),
|
||||
surgical_iids: Some(SurgicalIids {
|
||||
issues: options.issue_iids.clone(),
|
||||
merge_requests: options.mr_iids.clone(),
|
||||
}),
|
||||
..SyncResult::default()
|
||||
};
|
||||
let mut entity_results: Vec<EntitySyncResult> = Vec::new();
|
||||
let mut stage_timings: Vec<StageTiming> = Vec::new();
|
||||
|
||||
// ── Start recorder ──
|
||||
let recorder = SyncRunRecorder::start(&recorder_conn, "surgical-sync", run_id)?;
|
||||
let iids_json = serde_json::to_string(&result.surgical_iids).unwrap_or_default();
|
||||
recorder.set_surgical_metadata(&recorder_conn, "surgical", "preflight", &iids_json)?;
|
||||
|
||||
// ── GitLab client ──
|
||||
let token =
|
||||
std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet {
|
||||
env_var: config.gitlab.token_env_var.clone(),
|
||||
})?;
|
||||
let client = GitLabClient::new(
|
||||
&config.gitlab.base_url,
|
||||
&token,
|
||||
Some(config.sync.requests_per_second),
|
||||
);
|
||||
|
||||
// ── Stage: Preflight fetch ──
|
||||
let preflight_start = Instant::now();
|
||||
debug!(%run_id, "Surgical sync: preflight fetch");
|
||||
recorder.update_phase(&recorder_conn, "preflight")?;
|
||||
|
||||
let preflight = preflight_fetch(&client, gitlab_project_id, &project_path, &targets).await?;
|
||||
|
||||
for failure in &preflight.failures {
|
||||
entity_results.push(EntitySyncResult {
|
||||
entity_type: failure.target.entity_type().to_string(),
|
||||
iid: failure.target.iid(),
|
||||
outcome: "not_found".to_string(),
|
||||
error: Some(failure.error.to_string()),
|
||||
toctou_reason: None,
|
||||
});
|
||||
}
|
||||
|
||||
stage_timings.push(timing(
|
||||
"preflight",
|
||||
preflight_start.elapsed().as_millis() as u64,
|
||||
preflight.issues.len() + preflight.merge_requests.len(),
|
||||
preflight.failures.len(),
|
||||
));
|
||||
|
||||
// ── Preflight-only mode ──
|
||||
if options.preflight_only {
|
||||
result.preflight_only = Some(true);
|
||||
result.entity_results = Some(entity_results);
|
||||
recorder.succeed(&recorder_conn, &stage_timings, 0, preflight.failures.len())?;
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
// ── Cancellation check ──
|
||||
if signal.is_cancelled() {
|
||||
result.entity_results = Some(entity_results);
|
||||
recorder.cancel(&recorder_conn, "Cancelled before ingest")?;
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
// ── Acquire lock ──
|
||||
let mut lock = AppLock::new(
|
||||
lock_conn,
|
||||
LockOptions {
|
||||
name: "sync".to_string(),
|
||||
stale_lock_minutes: config.sync.stale_lock_minutes,
|
||||
heartbeat_interval_seconds: config.sync.heartbeat_interval_seconds,
|
||||
},
|
||||
);
|
||||
lock.acquire(options.force)?;
|
||||
|
||||
// ── Stage: Ingest ──
|
||||
let ingest_start = Instant::now();
|
||||
debug!(%run_id, "Surgical sync: ingesting entities");
|
||||
recorder.update_phase(&recorder_conn, "ingest")?;
|
||||
|
||||
let mut dirty_sources: Vec<(SourceType, i64)> = Vec::new();
|
||||
|
||||
// Ingest issues
|
||||
for issue in &preflight.issues {
|
||||
match ingest_issue_by_iid(&conn, config, project_id, issue) {
|
||||
Ok(ir) => {
|
||||
if ir.skipped_stale {
|
||||
entity_results.push(EntitySyncResult {
|
||||
entity_type: "issue".to_string(),
|
||||
iid: issue.iid as u64,
|
||||
outcome: "skipped_stale".to_string(),
|
||||
error: None,
|
||||
toctou_reason: Some("DB has same or newer updated_at".to_string()),
|
||||
});
|
||||
recorder.record_entity_result(&recorder_conn, "issue", "skipped_stale")?;
|
||||
} else {
|
||||
dirty_sources.extend(ir.dirty_source_keys);
|
||||
result.issues_updated += 1;
|
||||
entity_results.push(EntitySyncResult {
|
||||
entity_type: "issue".to_string(),
|
||||
iid: issue.iid as u64,
|
||||
outcome: "ingested".to_string(),
|
||||
error: None,
|
||||
toctou_reason: None,
|
||||
});
|
||||
recorder.record_entity_result(&recorder_conn, "issue", "ingested")?;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(iid = issue.iid, error = %e, "Surgical issue ingest failed");
|
||||
entity_results.push(EntitySyncResult {
|
||||
entity_type: "issue".to_string(),
|
||||
iid: issue.iid as u64,
|
||||
outcome: "error".to_string(),
|
||||
error: Some(e.to_string()),
|
||||
toctou_reason: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ingest MRs
|
||||
for mr in &preflight.merge_requests {
|
||||
match ingest_mr_by_iid(&conn, config, project_id, mr) {
|
||||
Ok(mr_result) => {
|
||||
if mr_result.skipped_stale {
|
||||
entity_results.push(EntitySyncResult {
|
||||
entity_type: "merge_request".to_string(),
|
||||
iid: mr.iid as u64,
|
||||
outcome: "skipped_stale".to_string(),
|
||||
error: None,
|
||||
toctou_reason: Some("DB has same or newer updated_at".to_string()),
|
||||
});
|
||||
recorder.record_entity_result(&recorder_conn, "mr", "skipped_stale")?;
|
||||
} else {
|
||||
dirty_sources.extend(mr_result.dirty_source_keys);
|
||||
result.mrs_updated += 1;
|
||||
entity_results.push(EntitySyncResult {
|
||||
entity_type: "merge_request".to_string(),
|
||||
iid: mr.iid as u64,
|
||||
outcome: "ingested".to_string(),
|
||||
error: None,
|
||||
toctou_reason: None,
|
||||
});
|
||||
recorder.record_entity_result(&recorder_conn, "mr", "ingested")?;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(iid = mr.iid, error = %e, "Surgical MR ingest failed");
|
||||
entity_results.push(EntitySyncResult {
|
||||
entity_type: "merge_request".to_string(),
|
||||
iid: mr.iid as u64,
|
||||
outcome: "error".to_string(),
|
||||
error: Some(e.to_string()),
|
||||
toctou_reason: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage_timings.push(timing(
|
||||
"ingest",
|
||||
ingest_start.elapsed().as_millis() as u64,
|
||||
result.issues_updated + result.mrs_updated,
|
||||
0,
|
||||
));
|
||||
|
||||
// ── Stage: Enrichment ──
|
||||
if signal.is_cancelled() {
|
||||
result.entity_results = Some(entity_results);
|
||||
lock.release();
|
||||
recorder.cancel(&recorder_conn, "Cancelled before enrichment")?;
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
let enrich_start = Instant::now();
|
||||
debug!(%run_id, "Surgical sync: enriching dependents");
|
||||
recorder.update_phase(&recorder_conn, "enrichment")?;
|
||||
|
||||
// Enrich issues: resource events
|
||||
if !options.no_events {
|
||||
for issue in &preflight.issues {
|
||||
let local_id = match conn.query_row(
|
||||
"SELECT id FROM issues WHERE project_id = ? AND iid = ?",
|
||||
(project_id, issue.iid),
|
||||
|row| row.get::<_, i64>(0),
|
||||
) {
|
||||
Ok(id) => id,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if let Err(e) = enrich_entity_resource_events(
|
||||
&client,
|
||||
&conn,
|
||||
project_id,
|
||||
gitlab_project_id,
|
||||
"issue",
|
||||
issue.iid,
|
||||
local_id,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!(iid = issue.iid, error = %e, "Failed to enrich issue resource events");
|
||||
result.resource_events_failed += 1;
|
||||
} else {
|
||||
result.resource_events_fetched += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Enrich MRs: resource events, closes_issues, file changes
|
||||
for mr in &preflight.merge_requests {
|
||||
let local_mr_id = match conn.query_row(
|
||||
"SELECT id FROM merge_requests WHERE project_id = ? AND iid = ?",
|
||||
(project_id, mr.iid),
|
||||
|row| row.get::<_, i64>(0),
|
||||
) {
|
||||
Ok(id) => id,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if !options.no_events {
|
||||
if let Err(e) = enrich_entity_resource_events(
|
||||
&client,
|
||||
&conn,
|
||||
project_id,
|
||||
gitlab_project_id,
|
||||
"merge_request",
|
||||
mr.iid,
|
||||
local_mr_id,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!(iid = mr.iid, error = %e, "Failed to enrich MR resource events");
|
||||
result.resource_events_failed += 1;
|
||||
} else {
|
||||
result.resource_events_fetched += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(e) = enrich_mr_closes_issues(
|
||||
&client,
|
||||
&conn,
|
||||
project_id,
|
||||
gitlab_project_id,
|
||||
mr.iid,
|
||||
local_mr_id,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!(iid = mr.iid, error = %e, "Failed to enrich MR closes_issues");
|
||||
}
|
||||
|
||||
if let Err(e) = enrich_mr_file_changes(
|
||||
&client,
|
||||
&conn,
|
||||
project_id,
|
||||
gitlab_project_id,
|
||||
mr.iid,
|
||||
local_mr_id,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!(iid = mr.iid, error = %e, "Failed to enrich MR file changes");
|
||||
result.mr_diffs_failed += 1;
|
||||
} else {
|
||||
result.mr_diffs_fetched += 1;
|
||||
}
|
||||
}
|
||||
|
||||
stage_timings.push(timing(
|
||||
"enrichment",
|
||||
enrich_start.elapsed().as_millis() as u64,
|
||||
result.resource_events_fetched + result.mr_diffs_fetched,
|
||||
result.resource_events_failed + result.mr_diffs_failed,
|
||||
));
|
||||
|
||||
// ── Stage: Scoped doc regeneration ──
|
||||
if !options.no_docs && !dirty_sources.is_empty() {
|
||||
if signal.is_cancelled() {
|
||||
result.entity_results = Some(entity_results);
|
||||
lock.release();
|
||||
recorder.cancel(&recorder_conn, "Cancelled before doc generation")?;
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
let docs_start = Instant::now();
|
||||
debug!(%run_id, count = dirty_sources.len(), "Surgical sync: regenerating docs");
|
||||
recorder.update_phase(&recorder_conn, "docs")?;
|
||||
|
||||
match regenerate_documents_for_sources(&conn, &dirty_sources) {
|
||||
Ok(docs_result) => {
|
||||
result.documents_regenerated = docs_result.regenerated;
|
||||
result.documents_errored = docs_result.errored;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "Surgical doc regeneration failed");
|
||||
}
|
||||
}
|
||||
|
||||
stage_timings.push(timing(
|
||||
"docs",
|
||||
docs_start.elapsed().as_millis() as u64,
|
||||
result.documents_regenerated,
|
||||
result.documents_errored,
|
||||
));
|
||||
}
|
||||
|
||||
// ── Stage: Embedding ──
|
||||
if !options.no_embed {
|
||||
if signal.is_cancelled() {
|
||||
result.entity_results = Some(entity_results);
|
||||
lock.release();
|
||||
recorder.cancel(&recorder_conn, "Cancelled before embedding")?;
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
let embed_start = Instant::now();
|
||||
debug!(%run_id, "Surgical sync: embedding");
|
||||
recorder.update_phase(&recorder_conn, "embed")?;
|
||||
|
||||
match run_embed(config, false, false, None, signal).await {
|
||||
Ok(embed_result) => {
|
||||
result.documents_embedded = embed_result.docs_embedded;
|
||||
result.embedding_failed = embed_result.failed;
|
||||
}
|
||||
Err(e) => {
|
||||
// Embedding failure is non-fatal (Ollama may be unavailable)
|
||||
warn!(error = %e, "Surgical embedding failed (non-fatal)");
|
||||
}
|
||||
}
|
||||
|
||||
stage_timings.push(timing(
|
||||
"embed",
|
||||
embed_start.elapsed().as_millis() as u64,
|
||||
result.documents_embedded,
|
||||
result.embedding_failed,
|
||||
));
|
||||
}
|
||||
|
||||
// ── Finalize ──
|
||||
lock.release();
|
||||
result.entity_results = Some(entity_results);
|
||||
|
||||
let total_items = result.issues_updated + result.mrs_updated;
|
||||
let total_errors =
|
||||
result.resource_events_failed + result.mr_diffs_failed + result.documents_errored;
|
||||
recorder.succeed(&recorder_conn, &stage_timings, total_items, total_errors)?;
|
||||
|
||||
debug!(
|
||||
%run_id,
|
||||
issues = result.issues_updated,
|
||||
mrs = result.mrs_updated,
|
||||
docs = result.documents_regenerated,
|
||||
"Surgical sync complete"
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "sync_surgical_tests.rs"]
|
||||
mod tests;
|
||||
323
src/cli/commands/sync_surgical_tests.rs
Normal file
323
src/cli/commands/sync_surgical_tests.rs
Normal file
@@ -0,0 +1,323 @@
|
||||
//! Tests for `sync_surgical.rs` — surgical sync orchestration.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use wiremock::matchers::{method, path, path_regex};
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
use crate::cli::commands::sync::SyncOptions;
|
||||
use crate::cli::commands::sync_surgical::run_sync_surgical;
|
||||
use crate::core::config::{Config, GitLabConfig, ProjectConfig};
|
||||
use crate::core::db::{create_connection, run_migrations};
|
||||
use crate::core::shutdown::ShutdownSignal;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn setup_temp_db() -> (tempfile::NamedTempFile, rusqlite::Connection) {
|
||||
let tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
let conn = create_connection(tmp.path()).unwrap();
|
||||
run_migrations(&conn).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url)
|
||||
VALUES (1, 42, 'group/repo', 'https://gitlab.example.com/group/repo')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
(tmp, conn)
|
||||
}
|
||||
|
||||
fn test_config(base_url: &str, db_path: &Path) -> Config {
|
||||
Config {
|
||||
gitlab: GitLabConfig {
|
||||
base_url: base_url.to_string(),
|
||||
token_env_var: "LORE_TEST_TOKEN".to_string(),
|
||||
},
|
||||
projects: vec![ProjectConfig {
|
||||
path: "group/repo".to_string(),
|
||||
}],
|
||||
default_project: None,
|
||||
sync: crate::core::config::SyncConfig {
|
||||
requests_per_second: 1000.0,
|
||||
stale_lock_minutes: 30,
|
||||
heartbeat_interval_seconds: 10,
|
||||
..Default::default()
|
||||
},
|
||||
storage: crate::core::config::StorageConfig {
|
||||
db_path: Some(db_path.to_string_lossy().to_string()),
|
||||
backup_dir: None,
|
||||
compress_raw_payloads: false,
|
||||
},
|
||||
embedding: Default::default(),
|
||||
logging: Default::default(),
|
||||
scoring: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn issue_json(iid: i64) -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"id": 1000 + iid,
|
||||
"iid": iid,
|
||||
"project_id": 42,
|
||||
"title": format!("Test issue #{iid}"),
|
||||
"description": "desc",
|
||||
"state": "opened",
|
||||
"created_at": "2026-02-17T10:00:00.000+00:00",
|
||||
"updated_at": "2026-02-17T12:00:00.000+00:00",
|
||||
"closed_at": null,
|
||||
"author": { "id": 1, "username": "alice", "name": "Alice" },
|
||||
"assignees": [],
|
||||
"labels": ["bug"],
|
||||
"milestone": null,
|
||||
"due_date": null,
|
||||
"web_url": format!("https://gitlab.example.com/group/repo/-/issues/{iid}")
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // Used by MR integration tests added later
|
||||
fn mr_json(iid: i64) -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"id": 2000 + iid,
|
||||
"iid": iid,
|
||||
"project_id": 42,
|
||||
"title": format!("Test MR !{iid}"),
|
||||
"description": "desc",
|
||||
"state": "opened",
|
||||
"draft": false,
|
||||
"work_in_progress": false,
|
||||
"source_branch": "feat",
|
||||
"target_branch": "main",
|
||||
"sha": "abc123",
|
||||
"references": { "short": format!("!{iid}"), "full": format!("group/repo!{iid}") },
|
||||
"detailed_merge_status": "mergeable",
|
||||
"created_at": "2026-02-17T10:00:00.000+00:00",
|
||||
"updated_at": "2026-02-17T12:00:00.000+00:00",
|
||||
"merged_at": null,
|
||||
"closed_at": null,
|
||||
"author": { "id": 2, "username": "bob", "name": "Bob" },
|
||||
"merge_user": null,
|
||||
"merged_by": null,
|
||||
"labels": [],
|
||||
"assignees": [],
|
||||
"reviewers": [],
|
||||
"web_url": format!("https://gitlab.example.com/group/repo/-/merge_requests/{iid}"),
|
||||
"merge_commit_sha": null,
|
||||
"squash_commit_sha": null
|
||||
})
|
||||
}
|
||||
|
||||
/// Mount all enrichment endpoint mocks (resource events, closes_issues, diffs) as empty.
|
||||
async fn mount_empty_enrichment_mocks(server: &MockServer) {
|
||||
// Resource events for issues
|
||||
Mock::given(method("GET"))
|
||||
.and(path_regex(
|
||||
r"/api/v4/projects/\d+/issues/\d+/resource_state_events",
|
||||
))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
|
||||
.mount(server)
|
||||
.await;
|
||||
Mock::given(method("GET"))
|
||||
.and(path_regex(
|
||||
r"/api/v4/projects/\d+/issues/\d+/resource_label_events",
|
||||
))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
|
||||
.mount(server)
|
||||
.await;
|
||||
Mock::given(method("GET"))
|
||||
.and(path_regex(
|
||||
r"/api/v4/projects/\d+/issues/\d+/resource_milestone_events",
|
||||
))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
|
||||
.mount(server)
|
||||
.await;
|
||||
|
||||
// Resource events for MRs
|
||||
Mock::given(method("GET"))
|
||||
.and(path_regex(
|
||||
r"/api/v4/projects/\d+/merge_requests/\d+/resource_state_events",
|
||||
))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
|
||||
.mount(server)
|
||||
.await;
|
||||
Mock::given(method("GET"))
|
||||
.and(path_regex(
|
||||
r"/api/v4/projects/\d+/merge_requests/\d+/resource_label_events",
|
||||
))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
|
||||
.mount(server)
|
||||
.await;
|
||||
Mock::given(method("GET"))
|
||||
.and(path_regex(
|
||||
r"/api/v4/projects/\d+/merge_requests/\d+/resource_milestone_events",
|
||||
))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
|
||||
.mount(server)
|
||||
.await;
|
||||
|
||||
// Closes issues
|
||||
Mock::given(method("GET"))
|
||||
.and(path_regex(
|
||||
r"/api/v4/projects/\d+/merge_requests/\d+/closes_issues",
|
||||
))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
|
||||
.mount(server)
|
||||
.await;
|
||||
|
||||
// Diffs
|
||||
Mock::given(method("GET"))
|
||||
.and(path_regex(r"/api/v4/projects/\d+/merge_requests/\d+/diffs"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
|
||||
.mount(server)
|
||||
.await;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::test]
|
||||
async fn ingest_one_issue_updates_result() {
|
||||
let server = MockServer::start().await;
|
||||
let (tmp, _conn) = setup_temp_db();
|
||||
|
||||
// Set token env var
|
||||
// SAFETY: Tests are single-threaded within each test function.
|
||||
unsafe { std::env::set_var("LORE_TEST_TOKEN", "test-token") };
|
||||
|
||||
// Mock preflight issue fetch
|
||||
Mock::given(method("GET"))
|
||||
.and(path("/api/v4/projects/42/issues/7"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(issue_json(7)))
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
mount_empty_enrichment_mocks(&server).await;
|
||||
|
||||
let config = test_config(&server.uri(), tmp.path());
|
||||
let options = SyncOptions {
|
||||
robot_mode: true,
|
||||
issue_iids: vec![7],
|
||||
project: Some("group/repo".to_string()),
|
||||
no_embed: true, // skip embed (no Ollama in tests)
|
||||
..SyncOptions::default()
|
||||
};
|
||||
let signal = ShutdownSignal::new();
|
||||
let result = run_sync_surgical(&config, options, Some("test01"), &signal)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.surgical_mode, Some(true));
|
||||
assert_eq!(result.issues_updated, 1);
|
||||
assert!(result.entity_results.is_some());
|
||||
let entities = result.entity_results.unwrap();
|
||||
assert_eq!(entities.len(), 1);
|
||||
assert_eq!(entities[0].outcome, "ingested");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn preflight_only_returns_early() {
|
||||
let server = MockServer::start().await;
|
||||
let (tmp, _conn) = setup_temp_db();
|
||||
|
||||
// SAFETY: Tests are single-threaded within each test function.
|
||||
unsafe { std::env::set_var("LORE_TEST_TOKEN", "test-token") };
|
||||
|
||||
Mock::given(method("GET"))
|
||||
.and(path("/api/v4/projects/42/issues/7"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(issue_json(7)))
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let config = test_config(&server.uri(), tmp.path());
|
||||
let options = SyncOptions {
|
||||
robot_mode: true,
|
||||
issue_iids: vec![7],
|
||||
project: Some("group/repo".to_string()),
|
||||
preflight_only: true,
|
||||
..SyncOptions::default()
|
||||
};
|
||||
let signal = ShutdownSignal::new();
|
||||
let result = run_sync_surgical(&config, options, Some("test02"), &signal)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.preflight_only, Some(true));
|
||||
assert_eq!(result.issues_updated, 0); // No actual ingest
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cancellation_before_ingest_cancels_recorder() {
|
||||
let server = MockServer::start().await;
|
||||
let (tmp, _conn) = setup_temp_db();
|
||||
|
||||
// SAFETY: Tests are single-threaded within each test function.
|
||||
unsafe { std::env::set_var("LORE_TEST_TOKEN", "test-token") };
|
||||
|
||||
Mock::given(method("GET"))
|
||||
.and(path("/api/v4/projects/42/issues/7"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(issue_json(7)))
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let config = test_config(&server.uri(), tmp.path());
|
||||
let options = SyncOptions {
|
||||
robot_mode: true,
|
||||
issue_iids: vec![7],
|
||||
project: Some("group/repo".to_string()),
|
||||
..SyncOptions::default()
|
||||
};
|
||||
let signal = ShutdownSignal::new();
|
||||
signal.cancel(); // Cancel before we start
|
||||
let result = run_sync_surgical(&config, options, Some("test03"), &signal)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.issues_updated, 0);
|
||||
}
|
||||
|
||||
fn dummy_config() -> Config {
|
||||
Config {
|
||||
gitlab: GitLabConfig {
|
||||
base_url: "https://unused.example.com".to_string(),
|
||||
token_env_var: "LORE_TEST_TOKEN".to_string(),
|
||||
},
|
||||
projects: vec![],
|
||||
default_project: None,
|
||||
sync: Default::default(),
|
||||
storage: Default::default(),
|
||||
embedding: Default::default(),
|
||||
logging: Default::default(),
|
||||
scoring: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn missing_project_returns_error() {
|
||||
let options = SyncOptions {
|
||||
issue_iids: vec![7],
|
||||
project: None, // Missing!
|
||||
..SyncOptions::default()
|
||||
};
|
||||
let config = dummy_config();
|
||||
let signal = ShutdownSignal::new();
|
||||
let err = run_sync_surgical(&config, options, Some("test04"), &signal)
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
assert!(err.to_string().contains("--project"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn empty_iids_returns_default_result() {
|
||||
let config = dummy_config();
|
||||
let options = SyncOptions::default(); // No IIDs
|
||||
let signal = ShutdownSignal::new();
|
||||
let result = run_sync_surgical(&config, options, None, &signal)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.issues_updated, 0);
|
||||
assert_eq!(result.mrs_updated, 0);
|
||||
assert!(result.surgical_mode.is_none()); // Not surgical mode
|
||||
}
|
||||
@@ -246,6 +246,10 @@ pub enum Commands {
|
||||
/// Launch the interactive TUI dashboard
|
||||
Tui(TuiArgs),
|
||||
|
||||
/// Find semantically related entities via vector similarity
|
||||
#[command(visible_alias = "similar")]
|
||||
Related(RelatedArgs),
|
||||
|
||||
/// Detect discussion divergence from original intent
|
||||
Drift {
|
||||
/// Entity type (currently only "issues" supported)
|
||||
@@ -814,6 +818,22 @@ pub struct SyncArgs {
|
||||
/// Show sync progress in interactive TUI
|
||||
#[arg(long)]
|
||||
pub tui: bool,
|
||||
|
||||
/// Surgically sync specific issues by IID (repeatable)
|
||||
#[arg(long, value_parser = clap::value_parser!(u64).range(1..))]
|
||||
pub issue: Vec<u64>,
|
||||
|
||||
/// Surgically sync specific merge requests by IID (repeatable)
|
||||
#[arg(long, value_parser = clap::value_parser!(u64).range(1..))]
|
||||
pub mr: Vec<u64>,
|
||||
|
||||
/// Scope to a single project (required for surgical sync if no defaultProject)
|
||||
#[arg(short = 'p', long)]
|
||||
pub project: Option<String>,
|
||||
|
||||
/// Run preflight validation only (no DB writes). Requires --issue or --mr.
|
||||
#[arg(long)]
|
||||
pub preflight_only: bool,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -1054,6 +1074,32 @@ pub struct TraceArgs {
|
||||
pub limit: usize,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(after_help = "\x1b[1mExamples:\x1b[0m
|
||||
lore related issues 42 # Find issues similar to #42
|
||||
lore related mrs 99 -p group/repo # MRs similar to !99
|
||||
lore related 'authentication timeout' # Concept search")]
|
||||
pub struct RelatedArgs {
|
||||
/// Entity type ('issues' or 'mrs') OR free-text query
|
||||
pub query_or_type: String,
|
||||
|
||||
/// Entity IID (when first arg is entity type)
|
||||
pub iid: Option<i64>,
|
||||
|
||||
/// Maximum results
|
||||
#[arg(
|
||||
short = 'n',
|
||||
long = "limit",
|
||||
default_value = "10",
|
||||
help_heading = "Output"
|
||||
)]
|
||||
pub limit: usize,
|
||||
|
||||
/// Scope to project (fuzzy match)
|
||||
#[arg(short = 'p', long, help_heading = "Filters")]
|
||||
pub project: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
pub struct CountArgs {
|
||||
/// Entity type to count (issues, mrs, discussions, notes, events)
|
||||
|
||||
Reference in New Issue
Block a user