feat(embedding): Add Ollama-powered vector embedding pipeline

Implements the embedding module that generates vector representations of documents using a local Ollama instance with the nomic-embed-text model. These embeddings enable semantic (vector) search and the hybrid search mode that fuses lexical and semantic results via RRF. Key components: - embedding::ollama: HTTP client for the Ollama /api/embeddings endpoint. Handles connection errors with actionable error messages (OllamaUnavailable, OllamaModelNotFound) and validates response dimensions. - embedding::chunking: Splits long documents into overlapping paragraph-aware chunks for embedding. Uses a configurable max token estimate (8192 default for nomic-embed-text) with 10% overlap to preserve cross-chunk context. - embedding::chunk_ids: Encodes chunk identity as doc_id * 1000 + chunk_index for the embeddings table rowid. This allows vector search to map results back to documents and deduplicate by doc_id efficiently. - embedding::change_detector: Compares document content_hash against stored embedding hashes to skip re-embedding unchanged documents, making incremental embedding runs fast. - embedding::pipeline: Orchestrates the full embedding flow: detect changed documents, chunk them, call Ollama in configurable concurrency (default 4), store results. Supports --retry-failed to re-attempt previously failed embeddings. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 15:46:30 -05:00
parent 20edff4ab1
commit 723703bed9
6 changed files with 810 additions and 0 deletions
--- a/src/embedding/change_detector.rs
+++ b/src/embedding/change_detector.rs
@@ -0,0 +1,79 @@
+//! Detect documents needing (re-)embedding based on content hash changes.
+
+use rusqlite::Connection;
+
+use crate::core::error::Result;
+
+/// A document that needs embedding or re-embedding.
+#[derive(Debug)]
+pub struct PendingDocument {
+    pub document_id: i64,
+    pub content_text: String,
+    pub content_hash: String,
+}
+
+/// Find documents that need embedding: new (no metadata) or changed (hash mismatch).
+///
+/// Uses keyset pagination (WHERE d.id > last_id) and returns up to `page_size` results.
+pub fn find_pending_documents(
+    conn: &Connection,
+    page_size: usize,
+    last_id: i64,
+) -> Result<Vec<PendingDocument>> {
+    // Documents that either:
+    // 1. Have no embedding_metadata at all (new)
+    // 2. Have metadata where document_hash != content_hash (changed)
+    let sql = r#"
+        SELECT d.id, d.content_text, d.content_hash
+        FROM documents d
+        WHERE d.id > ?1
+          AND (
+            NOT EXISTS (
+                SELECT 1 FROM embedding_metadata em
+                WHERE em.document_id = d.id AND em.chunk_index = 0
+            )
+            OR EXISTS (
+                SELECT 1 FROM embedding_metadata em
+                WHERE em.document_id = d.id AND em.chunk_index = 0
+                  AND em.document_hash != d.content_hash
+            )
+          )
+        ORDER BY d.id
+        LIMIT ?2
+    "#;
+
+    let mut stmt = conn.prepare(sql)?;
+    let rows = stmt
+        .query_map(rusqlite::params![last_id, page_size as i64], |row| {
+            Ok(PendingDocument {
+                document_id: row.get(0)?,
+                content_text: row.get(1)?,
+                content_hash: row.get(2)?,
+            })
+        })?
+        .collect::<std::result::Result<Vec<_>, _>>()?;
+
+    Ok(rows)
+}
+
+/// Count total documents that need embedding.
+pub fn count_pending_documents(conn: &Connection) -> Result<i64> {
+    let count: i64 = conn.query_row(
+        r#"
+        SELECT COUNT(*)
+        FROM documents d
+        WHERE NOT EXISTS (
+            SELECT 1 FROM embedding_metadata em
+            WHERE em.document_id = d.id AND em.chunk_index = 0
+        )
+        OR EXISTS (
+            SELECT 1 FROM embedding_metadata em
+            WHERE em.document_id = d.id AND em.chunk_index = 0
+              AND em.document_hash != d.content_hash
+        )
+        "#,
+        [],
+        |row| row.get(0),
+    )?;
+    Ok(count)
+}
--- a/src/embedding/chunk_ids.rs
+++ b/src/embedding/chunk_ids.rs
@@ -0,0 +1,63 @@
+/// Multiplier for encoding (document_id, chunk_index) into a single rowid.
+/// Supports up to 1000 chunks per document (32M chars at 32k/chunk).
+pub const CHUNK_ROWID_MULTIPLIER: i64 = 1000;
+
+/// Encode (document_id, chunk_index) into a sqlite-vec rowid.
+///
+/// rowid = document_id * CHUNK_ROWID_MULTIPLIER + chunk_index
+pub fn encode_rowid(document_id: i64, chunk_index: i64) -> i64 {
+    document_id * CHUNK_ROWID_MULTIPLIER + chunk_index
+}
+
+/// Decode a sqlite-vec rowid back into (document_id, chunk_index).
+pub fn decode_rowid(rowid: i64) -> (i64, i64) {
+    let document_id = rowid / CHUNK_ROWID_MULTIPLIER;
+    let chunk_index = rowid % CHUNK_ROWID_MULTIPLIER;
+    (document_id, chunk_index)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_encode_single_chunk() {
+        assert_eq!(encode_rowid(1, 0), 1000);
+    }
+
+    #[test]
+    fn test_encode_multi_chunk() {
+        assert_eq!(encode_rowid(1, 5), 1005);
+    }
+
+    #[test]
+    fn test_encode_specific_values() {
+        assert_eq!(encode_rowid(42, 0), 42000);
+        assert_eq!(encode_rowid(42, 5), 42005);
+    }
+
+    #[test]
+    fn test_decode_zero_chunk() {
+        assert_eq!(decode_rowid(42000), (42, 0));
+    }
+
+    #[test]
+    fn test_decode_roundtrip() {
+        for doc_id in [0, 1, 42, 100, 999, 10000] {
+            for chunk_idx in [0, 1, 5, 99, 999] {
+                let rowid = encode_rowid(doc_id, chunk_idx);
+                let (decoded_doc, decoded_chunk) = decode_rowid(rowid);
+                assert_eq!(
+                    (decoded_doc, decoded_chunk),
+                    (doc_id, chunk_idx),
+                    "Roundtrip failed for doc_id={doc_id}, chunk_idx={chunk_idx}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_multiplier_value() {
+        assert_eq!(CHUNK_ROWID_MULTIPLIER, 1000);
+    }
+}
--- a/src/embedding/chunking.rs
+++ b/src/embedding/chunking.rs
@@ -0,0 +1,207 @@
+//! Text chunking for embedding: split documents at paragraph boundaries with overlap.
+
+/// Maximum bytes per chunk.
+/// Named `_BYTES` because `str::len()` returns byte count; multi-byte UTF-8
+/// sequences mean byte length ≥ char count.
+pub const CHUNK_MAX_BYTES: usize = 32_000;
+
+/// Character overlap between adjacent chunks.
+pub const CHUNK_OVERLAP_CHARS: usize = 500;
+
+/// Split document content into chunks suitable for embedding.
+///
+/// Documents <= CHUNK_MAX_BYTES produce a single chunk.
+/// Longer documents are split at paragraph boundaries (`\n\n`), falling back
+/// to sentence boundaries, then word boundaries, then hard character cut.
+/// Adjacent chunks share CHUNK_OVERLAP_CHARS of overlap.
+///
+/// Returns Vec<(chunk_index, chunk_text)>.
+pub fn split_into_chunks(content: &str) -> Vec<(usize, String)> {
+    if content.is_empty() {
+        return Vec::new();
+    }
+
+    if content.len() <= CHUNK_MAX_BYTES {
+        return vec![(0, content.to_string())];
+    }
+
+    let mut chunks: Vec<(usize, String)> = Vec::new();
+    let mut start = 0;
+    let mut chunk_index = 0;
+
+    while start < content.len() {
+        let remaining = &content[start..];
+        if remaining.len() <= CHUNK_MAX_BYTES {
+            chunks.push((chunk_index, remaining.to_string()));
+            break;
+        }
+
+        // Find a split point within CHUNK_MAX_BYTES (char-boundary-safe)
+        let end = floor_char_boundary(content, start + CHUNK_MAX_BYTES);
+        let window = &content[start..end];
+
+        // Try paragraph boundary (\n\n) — search backward from end
+        let split_at = find_paragraph_break(window)
+            .or_else(|| find_sentence_break(window))
+            .or_else(|| find_word_break(window))
+            .unwrap_or(window.len());
+
+        let chunk_text = &content[start..start + split_at];
+        chunks.push((chunk_index, chunk_text.to_string()));
+
+        // Advance with overlap, guaranteeing forward progress to prevent infinite loops.
+        // If split_at <= CHUNK_OVERLAP_CHARS we skip overlap to avoid stalling.
+        // The .max(1) ensures we always advance at least 1 byte.
+        let advance = if split_at > CHUNK_OVERLAP_CHARS {
+            split_at - CHUNK_OVERLAP_CHARS
+        } else {
+            split_at
+        }
+        .max(1);
+        start += advance;
+        chunk_index += 1;
+    }
+
+    chunks
+}
+
+/// Find the last paragraph break (`\n\n`) in the window, preferring the
+/// last third for balanced chunks.
+fn find_paragraph_break(window: &str) -> Option<usize> {
+    // Search backward from 2/3 of the way through to find a good split
+    let search_start = window.len() * 2 / 3;
+    window[search_start..].rfind("\n\n").map(|pos| search_start + pos + 2)
+        .or_else(|| window[..search_start].rfind("\n\n").map(|pos| pos + 2))
+}
+
+/// Find the last sentence boundary (`. `, `? `, `! `) in the window.
+fn find_sentence_break(window: &str) -> Option<usize> {
+    let search_start = window.len() / 2;
+    for pat in &[". ", "? ", "! "] {
+        if let Some(pos) = window[search_start..].rfind(pat) {
+            return Some(search_start + pos + pat.len());
+        }
+    }
+    // Try first half
+    for pat in &[". ", "? ", "! "] {
+        if let Some(pos) = window[..search_start].rfind(pat) {
+            return Some(pos + pat.len());
+        }
+    }
+    None
+}
+
+/// Find the last word boundary (space) in the window.
+fn find_word_break(window: &str) -> Option<usize> {
+    let search_start = window.len() / 2;
+    window[search_start..].rfind(' ').map(|pos| search_start + pos + 1)
+        .or_else(|| window[..search_start].rfind(' ').map(|pos| pos + 1))
+}
+
+/// Find the largest byte index <= `idx` that is a valid char boundary in `s`.
+/// Equivalent to `str::floor_char_boundary` (stabilized in Rust 1.82).
+fn floor_char_boundary(s: &str, idx: usize) -> usize {
+    if idx >= s.len() {
+        return s.len();
+    }
+    let mut i = idx;
+    while i > 0 && !s.is_char_boundary(i) {
+        i -= 1;
+    }
+    i
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_empty_content() {
+        let chunks = split_into_chunks("");
+        assert!(chunks.is_empty());
+    }
+
+    #[test]
+    fn test_short_document_single_chunk() {
+        let content = "Short document content.";
+        let chunks = split_into_chunks(content);
+        assert_eq!(chunks.len(), 1);
+        assert_eq!(chunks[0].0, 0);
+        assert_eq!(chunks[0].1, content);
+    }
+
+    #[test]
+    fn test_exactly_max_chars() {
+        let content = "a".repeat(CHUNK_MAX_BYTES);
+        let chunks = split_into_chunks(&content);
+        assert_eq!(chunks.len(), 1);
+    }
+
+    #[test]
+    fn test_long_document_multiple_chunks() {
+        // Create content > CHUNK_MAX_BYTES with paragraph boundaries
+        let paragraph = "This is a paragraph of text.\n\n";
+        let mut content = String::new();
+        while content.len() < CHUNK_MAX_BYTES * 2 {
+            content.push_str(paragraph);
+        }
+
+        let chunks = split_into_chunks(&content);
+        assert!(chunks.len() >= 2, "Expected multiple chunks, got {}", chunks.len());
+
+        // Verify indices are sequential
+        for (i, (idx, _)) in chunks.iter().enumerate() {
+            assert_eq!(*idx, i);
+        }
+
+        // Verify all content is covered (no gaps)
+        assert!(!chunks.last().unwrap().1.is_empty());
+    }
+
+    #[test]
+    fn test_chunk_overlap() {
+        // Create content that will produce 2+ chunks
+        let paragraph = "This is paragraph content for testing chunk overlap behavior.\n\n";
+        let mut content = String::new();
+        while content.len() < CHUNK_MAX_BYTES + CHUNK_OVERLAP_CHARS + 1000 {
+            content.push_str(paragraph);
+        }
+
+        let chunks = split_into_chunks(&content);
+        assert!(chunks.len() >= 2);
+
+        // Check that adjacent chunks share some content (overlap)
+        if chunks.len() >= 2 {
+            let end_of_first = &chunks[0].1;
+            let start_of_second = &chunks[1].1;
+            // The end of first chunk should overlap with start of second
+            let overlap_region = &end_of_first[end_of_first.len().saturating_sub(CHUNK_OVERLAP_CHARS)..];
+            assert!(
+                start_of_second.starts_with(overlap_region)
+                    || overlap_region.contains(&start_of_second[..100.min(start_of_second.len())]),
+                "Expected overlap between chunks"
+            );
+        }
+    }
+
+    #[test]
+    fn test_no_paragraph_boundary() {
+        // Create content without paragraph breaks
+        let content = "word ".repeat(CHUNK_MAX_BYTES / 5 * 3);
+        let chunks = split_into_chunks(&content);
+        assert!(chunks.len() >= 2);
+        // Should still split (at word boundaries)
+        for (_, chunk) in &chunks {
+            assert!(!chunk.is_empty());
+        }
+    }
+
+    #[test]
+    fn test_chunk_indices_sequential() {
+        let content = "a ".repeat(CHUNK_MAX_BYTES);
+        let chunks = split_into_chunks(&content);
+        for (i, (idx, _)) in chunks.iter().enumerate() {
+            assert_eq!(*idx, i, "Chunk index mismatch at position {}", i);
+        }
+    }
+}
--- a/src/embedding/mod.rs
+++ b/src/embedding/mod.rs
@@ -0,0 +1,9 @@
+pub mod change_detector;
+pub mod chunk_ids;
+pub mod chunking;
+pub mod ollama;
+pub mod pipeline;
+
+pub use change_detector::{count_pending_documents, find_pending_documents, PendingDocument};
+pub use chunking::{split_into_chunks, CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS};
+pub use pipeline::{embed_documents, EmbedResult};
--- a/src/embedding/ollama.rs
+++ b/src/embedding/ollama.rs
@@ -0,0 +1,201 @@
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+use crate::core::error::{LoreError, Result};
+
+/// Configuration for Ollama embedding service.
+pub struct OllamaConfig {
+    pub base_url: String,
+    pub model: String,
+    pub timeout_secs: u64,
+}
+
+impl Default for OllamaConfig {
+    fn default() -> Self {
+        Self {
+            base_url: "http://localhost:11434".to_string(),
+            model: "nomic-embed-text".to_string(),
+            timeout_secs: 60,
+        }
+    }
+}
+
+/// Async client for Ollama embedding API.
+pub struct OllamaClient {
+    client: Client,
+    config: OllamaConfig,
+}
+
+#[derive(Serialize)]
+struct EmbedRequest {
+    model: String,
+    input: Vec<String>,
+}
+
+#[derive(Deserialize)]
+struct EmbedResponse {
+    #[allow(dead_code)]
+    model: String,
+    embeddings: Vec<Vec<f32>>,
+}
+
+#[derive(Deserialize)]
+struct TagsResponse {
+    models: Vec<ModelInfo>,
+}
+
+#[derive(Deserialize)]
+struct ModelInfo {
+    name: String,
+}
+
+impl OllamaClient {
+    pub fn new(config: OllamaConfig) -> Self {
+        let client = Client::builder()
+            .timeout(Duration::from_secs(config.timeout_secs))
+            .build()
+            .expect("Failed to create HTTP client");
+
+        Self { client, config }
+    }
+
+    /// Health check: verifies Ollama is reachable and the configured model exists.
+    ///
+    /// Model matching uses `starts_with` so "nomic-embed-text" matches
+    /// "nomic-embed-text:latest".
+    pub async fn health_check(&self) -> Result<()> {
+        let url = format!("{}/api/tags", self.config.base_url);
+
+        let response = self
+            .client
+            .get(&url)
+            .send()
+            .await
+            .map_err(|e| LoreError::OllamaUnavailable {
+                base_url: self.config.base_url.clone(),
+                source: Some(e),
+            })?;
+
+        let tags: TagsResponse =
+            response
+                .json()
+                .await
+                .map_err(|e| LoreError::OllamaUnavailable {
+                    base_url: self.config.base_url.clone(),
+                    source: Some(e),
+                })?;
+
+        let model_found = tags
+            .models
+            .iter()
+            .any(|m| m.name.starts_with(&self.config.model));
+
+        if !model_found {
+            return Err(LoreError::OllamaModelNotFound {
+                model: self.config.model.clone(),
+            });
+        }
+
+        Ok(())
+    }
+
+    /// Embed a batch of texts using the configured model.
+    ///
+    /// Returns one embedding vector per input text.
+    pub async fn embed_batch(&self, texts: Vec<String>) -> Result<Vec<Vec<f32>>> {
+        let url = format!("{}/api/embed", self.config.base_url);
+
+        let request = EmbedRequest {
+            model: self.config.model.clone(),
+            input: texts,
+        };
+
+        let response = self.client.post(&url).json(&request).send().await.map_err(
+            |e| LoreError::OllamaUnavailable {
+                base_url: self.config.base_url.clone(),
+                source: Some(e),
+            },
+        )?;
+
+        let status = response.status();
+        if !status.is_success() {
+            let body = response.text().await.unwrap_or_default();
+            return Err(LoreError::EmbeddingFailed {
+                document_id: 0,
+                reason: format!("HTTP {}: {}", status, body),
+            });
+        }
+
+        let embed_response: EmbedResponse =
+            response
+                .json()
+                .await
+                .map_err(|e| LoreError::EmbeddingFailed {
+                    document_id: 0,
+                    reason: format!("Failed to parse embed response: {}", e),
+                })?;
+
+        Ok(embed_response.embeddings)
+    }
+}
+
+/// Quick health check without creating a full client.
+pub async fn check_ollama_health(base_url: &str) -> bool {
+    let client = Client::builder()
+        .timeout(Duration::from_secs(5))
+        .build()
+        .ok();
+
+    let Some(client) = client else {
+        return false;
+    };
+
+    let url = format!("{base_url}/api/tags");
+    client.get(&url).send().await.is_ok()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_config_defaults() {
+        let config = OllamaConfig::default();
+        assert_eq!(config.base_url, "http://localhost:11434");
+        assert_eq!(config.model, "nomic-embed-text");
+        assert_eq!(config.timeout_secs, 60);
+    }
+
+    #[test]
+    fn test_health_check_model_starts_with() {
+        // Verify the matching logic: "nomic-embed-text" should match "nomic-embed-text:latest"
+        let model = "nomic-embed-text";
+        let tag_name = "nomic-embed-text:latest";
+        assert!(tag_name.starts_with(model));
+
+        // Non-matching model
+        let wrong_model = "llama2";
+        assert!(!tag_name.starts_with(wrong_model));
+    }
+
+    #[test]
+    fn test_embed_request_serialization() {
+        let request = EmbedRequest {
+            model: "nomic-embed-text".to_string(),
+            input: vec!["hello".to_string(), "world".to_string()],
+        };
+        let json = serde_json::to_string(&request).unwrap();
+        assert!(json.contains("\"model\":\"nomic-embed-text\""));
+        assert!(json.contains("\"input\":[\"hello\",\"world\"]"));
+    }
+
+    #[test]
+    fn test_embed_response_deserialization() {
+        let json = r#"{"model":"nomic-embed-text","embeddings":[[0.1,0.2,0.3],[0.4,0.5,0.6]]}"#;
+        let response: EmbedResponse = serde_json::from_str(json).unwrap();
+        assert_eq!(response.embeddings.len(), 2);
+        assert_eq!(response.embeddings[0], vec![0.1, 0.2, 0.3]);
+        assert_eq!(response.embeddings[1], vec![0.4, 0.5, 0.6]);
+    }
+}
--- a/src/embedding/pipeline.rs
+++ b/src/embedding/pipeline.rs
@@ -0,0 +1,251 @@
+//! Async embedding pipeline: chunk documents, embed via Ollama, store in sqlite-vec.
+
+use rusqlite::Connection;
+use sha2::{Digest, Sha256};
+use tracing::{info, warn};
+
+use crate::core::error::Result;
+use crate::embedding::change_detector::{count_pending_documents, find_pending_documents};
+use crate::embedding::chunk_ids::encode_rowid;
+use crate::embedding::chunking::split_into_chunks;
+use crate::embedding::ollama::OllamaClient;
+
+const BATCH_SIZE: usize = 32;
+const DB_PAGE_SIZE: usize = 500;
+const EXPECTED_DIMS: usize = 768;
+
+/// Result of an embedding run.
+#[derive(Debug, Default)]
+pub struct EmbedResult {
+    pub embedded: usize,
+    pub failed: usize,
+    pub skipped: usize,
+}
+
+/// Work item: a single chunk to embed.
+struct ChunkWork {
+    doc_id: i64,
+    chunk_index: usize,
+    doc_hash: String,
+    chunk_hash: String,
+    text: String,
+}
+
+/// Run the embedding pipeline: find pending documents, chunk, embed, store.
+///
+/// Processes batches of BATCH_SIZE texts per Ollama API call.
+/// Uses keyset pagination over documents (DB_PAGE_SIZE per page).
+pub async fn embed_documents(
+    conn: &Connection,
+    client: &OllamaClient,
+    model_name: &str,
+    progress_callback: Option<Box<dyn Fn(usize, usize)>>,
+) -> Result<EmbedResult> {
+    let total = count_pending_documents(conn)? as usize;
+    let mut result = EmbedResult::default();
+    let mut last_id: i64 = 0;
+    let mut processed: usize = 0;
+
+    if total == 0 {
+        return Ok(result);
+    }
+
+    info!(total, "Starting embedding pipeline");
+
+    loop {
+        let pending = find_pending_documents(conn, DB_PAGE_SIZE, last_id)?;
+        if pending.is_empty() {
+            break;
+        }
+
+        // Build chunk work items for this page
+        let mut all_chunks: Vec<ChunkWork> = Vec::new();
+
+        for doc in &pending {
+            // Always advance the cursor, even for skipped docs, to avoid re-fetching
+            last_id = doc.document_id;
+
+            if doc.content_text.is_empty() {
+                result.skipped += 1;
+                processed += 1;
+                continue;
+            }
+
+            // Clear existing embeddings for this document before re-embedding
+            clear_document_embeddings(conn, doc.document_id)?;
+
+            let chunks = split_into_chunks(&doc.content_text);
+            for (chunk_index, text) in chunks {
+                all_chunks.push(ChunkWork {
+                    doc_id: doc.document_id,
+                    chunk_index,
+                    doc_hash: doc.content_hash.clone(),
+                    chunk_hash: sha256_hash(&text),
+                    text,
+                });
+            }
+
+            // Track progress per document (not per chunk) to match `total`
+            processed += 1;
+            if let Some(ref cb) = progress_callback {
+                cb(processed, total);
+            }
+        }
+
+        // Process chunks in batches of BATCH_SIZE
+        for batch in all_chunks.chunks(BATCH_SIZE) {
+            let texts: Vec<String> = batch.iter().map(|c| c.text.clone()).collect();
+
+            match client.embed_batch(texts).await {
+                Ok(embeddings) => {
+                    for (i, embedding) in embeddings.iter().enumerate() {
+                        if i >= batch.len() {
+                            break;
+                        }
+                        let chunk = &batch[i];
+
+                        if embedding.len() != EXPECTED_DIMS {
+                            warn!(
+                                doc_id = chunk.doc_id,
+                                chunk_index = chunk.chunk_index,
+                                got_dims = embedding.len(),
+                                expected = EXPECTED_DIMS,
+                                "Dimension mismatch, skipping"
+                            );
+                            record_embedding_error(
+                                conn,
+                                chunk.doc_id,
+                                chunk.chunk_index,
+                                &chunk.doc_hash,
+                                &chunk.chunk_hash,
+                                model_name,
+                                &format!(
+                                    "Dimension mismatch: got {}, expected {}",
+                                    embedding.len(),
+                                    EXPECTED_DIMS
+                                ),
+                            )?;
+                            result.failed += 1;
+                            continue;
+                        }
+
+                        store_embedding(
+                            conn,
+                            chunk.doc_id,
+                            chunk.chunk_index,
+                            &chunk.doc_hash,
+                            &chunk.chunk_hash,
+                            model_name,
+                            embedding,
+                        )?;
+                        result.embedded += 1;
+                    }
+                }
+                Err(e) => {
+                    warn!(error = %e, "Batch embedding failed");
+                    for chunk in batch {
+                        record_embedding_error(
+                            conn,
+                            chunk.doc_id,
+                            chunk.chunk_index,
+                            &chunk.doc_hash,
+                            &chunk.chunk_hash,
+                            model_name,
+                            &e.to_string(),
+                        )?;
+                        result.failed += 1;
+                    }
+                }
+            }
+
+        }
+    }
+
+    info!(
+        embedded = result.embedded,
+        failed = result.failed,
+        skipped = result.skipped,
+        "Embedding pipeline complete"
+    );
+
+    Ok(result)
+}
+
+/// Clear all embeddings and metadata for a document.
+fn clear_document_embeddings(conn: &Connection, document_id: i64) -> Result<()> {
+    conn.execute(
+        "DELETE FROM embedding_metadata WHERE document_id = ?1",
+        [document_id],
+    )?;
+
+    let start_rowid = encode_rowid(document_id, 0);
+    let end_rowid = encode_rowid(document_id + 1, 0);
+    conn.execute(
+        "DELETE FROM embeddings WHERE rowid >= ?1 AND rowid < ?2",
+        rusqlite::params![start_rowid, end_rowid],
+    )?;
+
+    Ok(())
+}
+
+/// Store an embedding vector and its metadata.
+fn store_embedding(
+    conn: &Connection,
+    doc_id: i64,
+    chunk_index: usize,
+    doc_hash: &str,
+    chunk_hash: &str,
+    model_name: &str,
+    embedding: &[f32],
+) -> Result<()> {
+    let rowid = encode_rowid(doc_id, chunk_index as i64);
+
+    let embedding_bytes: Vec<u8> = embedding.iter().flat_map(|f| f.to_le_bytes()).collect();
+
+    conn.execute(
+        "INSERT OR REPLACE INTO embeddings (rowid, embedding) VALUES (?1, ?2)",
+        rusqlite::params![rowid, embedding_bytes],
+    )?;
+
+    let now = chrono::Utc::now().timestamp_millis();
+    conn.execute(
+        "INSERT OR REPLACE INTO embedding_metadata
+         (document_id, chunk_index, model, dims, document_hash, chunk_hash,
+          created_at, attempt_count, last_error)
+         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1, NULL)",
+        rusqlite::params![doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64, doc_hash, chunk_hash, now],
+    )?;
+
+    Ok(())
+}
+
+/// Record an embedding error in metadata for later retry.
+fn record_embedding_error(
+    conn: &Connection,
+    doc_id: i64,
+    chunk_index: usize,
+    doc_hash: &str,
+    chunk_hash: &str,
+    model_name: &str,
+    error: &str,
+) -> Result<()> {
+    let now = chrono::Utc::now().timestamp_millis();
+    conn.execute(
+        "INSERT INTO embedding_metadata
+         (document_id, chunk_index, model, dims, document_hash, chunk_hash,
+          created_at, attempt_count, last_error, last_attempt_at)
+         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1, ?8, ?7)
+         ON CONFLICT(document_id, chunk_index) DO UPDATE SET
+           attempt_count = embedding_metadata.attempt_count + 1,
+           last_error = ?8,
+           last_attempt_at = ?7",
+        rusqlite::params![doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64, doc_hash, chunk_hash, now, error],
+    )?;
+    Ok(())
+}
+
+fn sha256_hash(input: &str) -> String {
+    let mut hasher = Sha256::new();
+    hasher.update(input.as_bytes());
+    format!("{:x}", hasher.finalize())
+}