Adds 'lore related' / 'lore similar' command for discovering semantically related issues and MRs using vector embeddings. Two modes: - Entity mode: find entities similar to a specific issue/MR - Query mode: embed free text and find matching entities Includes distance-to-similarity conversion, label intersection, human and robot output formatters, and 11 unit tests.
144 lines
4.5 KiB
Rust
144 lines
4.5 KiB
Rust
use rusqlite::Connection;
|
|
|
|
use super::error::Result;
|
|
use super::metrics::StageTiming;
|
|
use super::time::now_ms;
|
|
|
|
pub struct SyncRunRecorder {
|
|
row_id: i64,
|
|
}
|
|
|
|
impl SyncRunRecorder {
|
|
pub fn start(conn: &Connection, command: &str, run_id: &str) -> Result<Self> {
|
|
let now = now_ms();
|
|
conn.execute(
|
|
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command, run_id)
|
|
VALUES (?1, ?2, 'running', ?3, ?4)",
|
|
rusqlite::params![now, now, command, run_id],
|
|
)?;
|
|
let row_id = conn.last_insert_rowid();
|
|
Ok(Self { row_id })
|
|
}
|
|
|
|
/// Returns the database row ID for this sync run.
|
|
pub fn row_id(&self) -> i64 {
|
|
self.row_id
|
|
}
|
|
|
|
/// Set surgical-specific metadata after `start()`.
|
|
///
|
|
/// Takes `&self` so the recorder can continue to be used for phase
|
|
/// updates and entity result recording before finalization.
|
|
pub fn set_surgical_metadata(
|
|
&self,
|
|
conn: &Connection,
|
|
mode: &str,
|
|
phase: &str,
|
|
iids_json: &str,
|
|
) -> Result<()> {
|
|
conn.execute(
|
|
"UPDATE sync_runs SET mode = ?1, phase = ?2, surgical_iids_json = ?3
|
|
WHERE id = ?4",
|
|
rusqlite::params![mode, phase, iids_json, self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Update the pipeline phase and refresh the heartbeat timestamp.
|
|
pub fn update_phase(&self, conn: &Connection, phase: &str) -> Result<()> {
|
|
conn.execute(
|
|
"UPDATE sync_runs SET phase = ?1, heartbeat_at = ?2 WHERE id = ?3",
|
|
rusqlite::params![phase, now_ms(), self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Increment a surgical counter column for the given entity type and stage.
|
|
///
|
|
/// Unknown `(entity_type, stage)` combinations are silently ignored.
|
|
/// Column names are derived from a hardcoded match — no SQL injection risk.
|
|
pub fn record_entity_result(
|
|
&self,
|
|
conn: &Connection,
|
|
entity_type: &str,
|
|
stage: &str,
|
|
) -> Result<()> {
|
|
let column = match (entity_type, stage) {
|
|
("issue", "fetched") => "issues_fetched",
|
|
("issue", "ingested") => "issues_ingested",
|
|
("mr", "fetched") => "mrs_fetched",
|
|
("mr", "ingested") => "mrs_ingested",
|
|
("issue" | "mr", "skipped_stale") => "skipped_stale",
|
|
("doc", "regenerated") => "docs_regenerated",
|
|
("doc", "embedded") => "docs_embedded",
|
|
(_, "warning") => "warnings_count",
|
|
_ => return Ok(()),
|
|
};
|
|
conn.execute(
|
|
&format!("UPDATE sync_runs SET {column} = {column} + 1 WHERE id = ?1"),
|
|
rusqlite::params![self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn succeed(
|
|
self,
|
|
conn: &Connection,
|
|
metrics: &[StageTiming],
|
|
total_items: usize,
|
|
total_errors: usize,
|
|
) -> Result<()> {
|
|
let now = now_ms();
|
|
let metrics_json = serde_json::to_string(metrics).unwrap_or_else(|_| "[]".to_string());
|
|
conn.execute(
|
|
"UPDATE sync_runs
|
|
SET finished_at = ?1, status = 'succeeded',
|
|
metrics_json = ?2, total_items_processed = ?3, total_errors = ?4
|
|
WHERE id = ?5",
|
|
rusqlite::params![
|
|
now,
|
|
metrics_json,
|
|
total_items as i64,
|
|
total_errors as i64,
|
|
self.row_id
|
|
],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn fail(
|
|
self,
|
|
conn: &Connection,
|
|
error: &str,
|
|
metrics: Option<&[StageTiming]>,
|
|
) -> Result<()> {
|
|
let now = now_ms();
|
|
let metrics_json =
|
|
metrics.map(|m| serde_json::to_string(m).unwrap_or_else(|_| "[]".to_string()));
|
|
conn.execute(
|
|
"UPDATE sync_runs
|
|
SET finished_at = ?1, status = 'failed', error = ?2,
|
|
metrics_json = ?3
|
|
WHERE id = ?4",
|
|
rusqlite::params![now, error, metrics_json, self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Finalize the run as cancelled. Consumes self to prevent further use.
|
|
pub fn cancel(self, conn: &Connection, reason: &str) -> Result<()> {
|
|
let now = now_ms();
|
|
conn.execute(
|
|
"UPDATE sync_runs SET finished_at = ?1, cancelled_at = ?2,
|
|
status = 'cancelled', error = ?3
|
|
WHERE id = ?4",
|
|
rusqlite::params![now, now, reason, self.row_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
#[path = "sync_run_tests.rs"]
|
|
mod tests;
|