feat(bd-1cjx): add lore drift command for discussion divergence detection

Implement drift detection using cosine similarity between issue description
embedding and chronological note embeddings. Sliding window (size 3) identifies
topic drift points. Includes human and robot output formatters.

New files: drift.rs, similarity.rs
Closes: bd-1cjx
This commit is contained in:
teernisse
2026-02-12 11:34:10 -05:00
parent b29c382583
commit 47eecce8e9
6 changed files with 761 additions and 11 deletions

View File

@@ -3,7 +3,9 @@ pub mod chunk_ids;
pub mod chunking;
pub mod ollama;
pub mod pipeline;
pub mod similarity;
pub use change_detector::{PendingDocument, count_pending_documents, find_pending_documents};
pub use chunking::{CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS, split_into_chunks};
pub use pipeline::{EmbedResult, embed_documents};
pub use similarity::cosine_similarity;

View File

@@ -0,0 +1,48 @@
/// Cosine similarity between two embedding vectors.
/// Returns value in [-1, 1] range; higher = more similar.
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
debug_assert_eq!(a.len(), b.len(), "embedding dimensions must match");
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
return 0.0;
}
dot / (norm_a * norm_b)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cosine_similarity_identical() {
let v = [1.0, 2.0, 3.0];
let sim = cosine_similarity(&v, &v);
assert!((sim - 1.0).abs() < 1e-6);
}
#[test]
fn test_cosine_similarity_orthogonal() {
let a = [1.0, 0.0, 0.0];
let b = [0.0, 1.0, 0.0];
let sim = cosine_similarity(&a, &b);
assert!(sim.abs() < 1e-6);
}
#[test]
fn test_cosine_similarity_zero_vector() {
let a = [1.0, 2.0, 3.0];
let b = [0.0, 0.0, 0.0];
let sim = cosine_similarity(&a, &b);
assert!((sim - 0.0).abs() < 1e-6);
}
#[test]
fn test_cosine_similarity_opposite() {
let a = [1.0, 2.0, 3.0];
let b = [-1.0, -2.0, -3.0];
let sim = cosine_similarity(&a, &b);
assert!((sim - (-1.0)).abs() < 1e-6);
}
}