feat(db): Add migrations for documents, FTS5, and embeddings
Three new migrations establish the search infrastructure: - 007_documents: Creates the `documents` table as the central search unit. Each document is a rendered text blob derived from an issue, MR, or discussion. Includes `dirty_queue` table for tracking which entities need document regeneration after ingestion changes. - 008_fts5: Creates FTS5 virtual table `documents_fts` with content sync triggers. Uses `unicode61` tokenizer with `remove_diacritics=2` for broad language support. Automatic insert/update/delete triggers keep the FTS index synchronized with the documents table. - 009_embeddings: Creates `embeddings` table for storing vector chunks produced by Ollama. Uses `doc_id * 1000 + chunk_index` rowid encoding to support multi-chunk documents while enabling efficient doc-level deduplication in vector search results. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
42
migrations/008_fts5.sql
Normal file
42
migrations/008_fts5.sql
Normal file
@@ -0,0 +1,42 @@
|
||||
-- Migration 008: FTS5 Full-Text Search Index
|
||||
-- Schema version: 8
|
||||
-- Adds full-text search on documents table with sync triggers
|
||||
|
||||
-- Full-text search with porter stemmer and prefix indexes for type-ahead
|
||||
CREATE VIRTUAL TABLE documents_fts USING fts5(
|
||||
title,
|
||||
content_text,
|
||||
content='documents',
|
||||
content_rowid='id',
|
||||
tokenize='porter unicode61',
|
||||
prefix='2 3 4'
|
||||
);
|
||||
|
||||
-- Keep FTS in sync via triggers.
|
||||
-- IMPORTANT: COALESCE(title, '') ensures FTS5 external-content table never
|
||||
-- receives NULL values, which can cause inconsistencies with delete operations.
|
||||
-- FTS5 delete requires exact match of original values; NULL != NULL in SQL,
|
||||
-- so a NULL title on insert would make the delete trigger fail silently.
|
||||
CREATE TRIGGER documents_ai AFTER INSERT ON documents BEGIN
|
||||
INSERT INTO documents_fts(rowid, title, content_text)
|
||||
VALUES (new.id, COALESCE(new.title, ''), new.content_text);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
|
||||
INSERT INTO documents_fts(documents_fts, rowid, title, content_text)
|
||||
VALUES('delete', old.id, COALESCE(old.title, ''), old.content_text);
|
||||
END;
|
||||
|
||||
-- Only rebuild FTS when searchable text actually changes (not metadata-only updates)
|
||||
CREATE TRIGGER documents_au AFTER UPDATE ON documents
|
||||
WHEN old.title IS NOT new.title OR old.content_text != new.content_text
|
||||
BEGIN
|
||||
INSERT INTO documents_fts(documents_fts, rowid, title, content_text)
|
||||
VALUES('delete', old.id, COALESCE(old.title, ''), old.content_text);
|
||||
INSERT INTO documents_fts(rowid, title, content_text)
|
||||
VALUES (new.id, COALESCE(new.title, ''), new.content_text);
|
||||
END;
|
||||
|
||||
-- Update schema version
|
||||
INSERT INTO schema_version (version, applied_at, description)
|
||||
VALUES (8, strftime('%s', 'now') * 1000, 'FTS5 full-text search index with sync triggers');
|
||||
Reference in New Issue
Block a user