diff --git a/migrations/010_chunk_config.sql b/migrations/010_chunk_config.sql new file mode 100644 index 0000000..952df7f --- /dev/null +++ b/migrations/010_chunk_config.sql @@ -0,0 +1,14 @@ +-- Migration 010: Chunk config tracking + adaptive dedup support +-- Schema version: 10 + +ALTER TABLE embedding_metadata ADD COLUMN chunk_max_bytes INTEGER; +ALTER TABLE embedding_metadata ADD COLUMN chunk_count INTEGER; + +-- Partial index: accelerates drift detection and adaptive dedup queries on sentinel rows +CREATE INDEX idx_embedding_metadata_sentinel + ON embedding_metadata(document_id, chunk_index) + WHERE chunk_index = 0; + +INSERT INTO schema_version (version, applied_at, description) +VALUES (10, strftime('%s', 'now') * 1000, + 'Add chunk_max_bytes and chunk_count to embedding_metadata'); diff --git a/src/core/db.rs b/src/core/db.rs index a95ccf8..ad39bdf 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -10,6 +10,10 @@ use tracing::{debug, info}; use super::error::{LoreError, Result}; +/// Latest schema version, derived from the embedded migrations count. +/// Used by the health check to verify databases are up-to-date. +pub const LATEST_SCHEMA_VERSION: i32 = MIGRATIONS.len() as i32; + /// Embedded migrations - compiled into the binary. const MIGRATIONS: &[(&str, &str)] = &[ ("001", include_str!("../../migrations/001_initial.sql")), @@ -39,6 +43,10 @@ const MIGRATIONS: &[(&str, &str)] = &[ "009", include_str!("../../migrations/009_embeddings.sql"), ), + ( + "010", + include_str!("../../migrations/010_chunk_config.sql"), + ), ]; /// Create a database connection with production-grade pragmas.