feat(bd-1cjx): add lore drift command for discussion divergence detection
Implement drift detection using cosine similarity between issue description embedding and chronological note embeddings. Sliding window (size 3) identifies topic drift points. Includes human and robot output formatters. New files: drift.rs, similarity.rs Closes: bd-1cjx
This commit is contained in:
@@ -3,7 +3,9 @@ pub mod chunk_ids;
|
||||
pub mod chunking;
|
||||
pub mod ollama;
|
||||
pub mod pipeline;
|
||||
pub mod similarity;
|
||||
|
||||
pub use change_detector::{PendingDocument, count_pending_documents, find_pending_documents};
|
||||
pub use chunking::{CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS, split_into_chunks};
|
||||
pub use pipeline::{EmbedResult, embed_documents};
|
||||
pub use similarity::cosine_similarity;
|
||||
|
||||
48
src/embedding/similarity.rs
Normal file
48
src/embedding/similarity.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
/// Cosine similarity between two embedding vectors.
|
||||
/// Returns value in [-1, 1] range; higher = more similar.
|
||||
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
debug_assert_eq!(a.len(), b.len(), "embedding dimensions must match");
|
||||
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm_a == 0.0 || norm_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
dot / (norm_a * norm_b)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_identical() {
|
||||
let v = [1.0, 2.0, 3.0];
|
||||
let sim = cosine_similarity(&v, &v);
|
||||
assert!((sim - 1.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_orthogonal() {
|
||||
let a = [1.0, 0.0, 0.0];
|
||||
let b = [0.0, 1.0, 0.0];
|
||||
let sim = cosine_similarity(&a, &b);
|
||||
assert!(sim.abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_zero_vector() {
|
||||
let a = [1.0, 2.0, 3.0];
|
||||
let b = [0.0, 0.0, 0.0];
|
||||
let sim = cosine_similarity(&a, &b);
|
||||
assert!((sim - 0.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_opposite() {
|
||||
let a = [1.0, 2.0, 3.0];
|
||||
let b = [-1.0, -2.0, -3.0];
|
||||
let sim = cosine_similarity(&a, &b);
|
||||
assert!((sim - (-1.0)).abs() < 1e-6);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user