feat: implement per-note search and document pipeline
- Add SourceType::Note with extract_note_document() and ParentMetadataCache - Migration 022: composite indexes for notes queries + author_id column - Migration 024: table rebuild adding 'note' to CHECK constraints, defense triggers - Migration 025: backfill existing non-system notes into dirty queue - Add lore notes CLI command with 17 filter options (author, path, resolution, etc.) - Support table/json/jsonl/csv output formats with field selection - Wire note dirty tracking through discussion and MR discussion ingestion - Fix test_migration_024_preserves_existing_data off-by-one (tested wrong migration) - Fix upsert_document_inner returning false for label/path-only changes
This commit is contained in:
153
migrations/024_note_documents.sql
Normal file
153
migrations/024_note_documents.sql
Normal file
@@ -0,0 +1,153 @@
|
||||
-- Migration 024: Add 'note' source_type to documents and dirty_sources
|
||||
-- SQLite does not support ALTER CONSTRAINT, so we use the table-rebuild pattern.
|
||||
|
||||
-- ============================================================
|
||||
-- 1. Rebuild dirty_sources with updated CHECK constraint
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE dirty_sources_new (
|
||||
source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion','note')),
|
||||
source_id INTEGER NOT NULL,
|
||||
queued_at INTEGER NOT NULL,
|
||||
attempt_count INTEGER NOT NULL DEFAULT 0,
|
||||
last_attempt_at INTEGER,
|
||||
last_error TEXT,
|
||||
next_attempt_at INTEGER,
|
||||
PRIMARY KEY(source_type, source_id)
|
||||
);
|
||||
|
||||
INSERT INTO dirty_sources_new SELECT * FROM dirty_sources;
|
||||
DROP TABLE dirty_sources;
|
||||
ALTER TABLE dirty_sources_new RENAME TO dirty_sources;
|
||||
CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at);
|
||||
|
||||
-- ============================================================
|
||||
-- 2. Rebuild documents with updated CHECK constraint
|
||||
-- ============================================================
|
||||
|
||||
-- 2a. Backup junction table data
|
||||
CREATE TEMP TABLE _doc_labels_backup AS SELECT * FROM document_labels;
|
||||
CREATE TEMP TABLE _doc_paths_backup AS SELECT * FROM document_paths;
|
||||
|
||||
-- 2b. Drop all triggers that reference documents
|
||||
DROP TRIGGER IF EXISTS documents_ai;
|
||||
DROP TRIGGER IF EXISTS documents_ad;
|
||||
DROP TRIGGER IF EXISTS documents_au;
|
||||
DROP TRIGGER IF EXISTS documents_embeddings_ad;
|
||||
|
||||
-- 2c. Drop junction tables (they have FK references to documents)
|
||||
DROP TABLE IF EXISTS document_labels;
|
||||
DROP TABLE IF EXISTS document_paths;
|
||||
|
||||
-- 2d. Create new documents table with 'note' in CHECK constraint
|
||||
CREATE TABLE documents_new (
|
||||
id INTEGER PRIMARY KEY,
|
||||
source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion','note')),
|
||||
source_id INTEGER NOT NULL,
|
||||
project_id INTEGER NOT NULL REFERENCES projects(id),
|
||||
author_username TEXT,
|
||||
label_names TEXT,
|
||||
created_at INTEGER,
|
||||
updated_at INTEGER,
|
||||
url TEXT,
|
||||
title TEXT,
|
||||
content_text TEXT NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
labels_hash TEXT NOT NULL DEFAULT '',
|
||||
paths_hash TEXT NOT NULL DEFAULT '',
|
||||
is_truncated INTEGER NOT NULL DEFAULT 0,
|
||||
truncated_reason TEXT CHECK (
|
||||
truncated_reason IN (
|
||||
'token_limit_middle_drop','single_note_oversized','first_last_oversized',
|
||||
'hard_cap_oversized'
|
||||
)
|
||||
OR truncated_reason IS NULL
|
||||
),
|
||||
UNIQUE(source_type, source_id)
|
||||
);
|
||||
|
||||
-- 2e. Copy all existing data
|
||||
INSERT INTO documents_new SELECT * FROM documents;
|
||||
|
||||
-- 2f. Swap tables
|
||||
DROP TABLE documents;
|
||||
ALTER TABLE documents_new RENAME TO documents;
|
||||
|
||||
-- 2g. Recreate all indexes on documents
|
||||
CREATE INDEX idx_documents_project_updated ON documents(project_id, updated_at);
|
||||
CREATE INDEX idx_documents_author ON documents(author_username);
|
||||
CREATE INDEX idx_documents_source ON documents(source_type, source_id);
|
||||
CREATE INDEX idx_documents_hash ON documents(content_hash);
|
||||
|
||||
-- 2h. Recreate junction tables
|
||||
CREATE TABLE document_labels (
|
||||
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
label_name TEXT NOT NULL,
|
||||
PRIMARY KEY(document_id, label_name)
|
||||
) WITHOUT ROWID;
|
||||
CREATE INDEX idx_document_labels_label ON document_labels(label_name);
|
||||
|
||||
CREATE TABLE document_paths (
|
||||
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
path TEXT NOT NULL,
|
||||
PRIMARY KEY(document_id, path)
|
||||
) WITHOUT ROWID;
|
||||
CREATE INDEX idx_document_paths_path ON document_paths(path);
|
||||
|
||||
-- 2i. Restore junction table data from backups
|
||||
INSERT INTO document_labels SELECT * FROM _doc_labels_backup;
|
||||
INSERT INTO document_paths SELECT * FROM _doc_paths_backup;
|
||||
|
||||
-- 2j. Recreate FTS triggers (from migration 008)
|
||||
CREATE TRIGGER documents_ai AFTER INSERT ON documents BEGIN
|
||||
INSERT INTO documents_fts(rowid, title, content_text)
|
||||
VALUES (new.id, COALESCE(new.title, ''), new.content_text);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
|
||||
INSERT INTO documents_fts(documents_fts, rowid, title, content_text)
|
||||
VALUES('delete', old.id, COALESCE(old.title, ''), old.content_text);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER documents_au AFTER UPDATE ON documents
|
||||
WHEN old.title IS NOT new.title OR old.content_text != new.content_text
|
||||
BEGIN
|
||||
INSERT INTO documents_fts(documents_fts, rowid, title, content_text)
|
||||
VALUES('delete', old.id, COALESCE(old.title, ''), old.content_text);
|
||||
INSERT INTO documents_fts(rowid, title, content_text)
|
||||
VALUES (new.id, COALESCE(new.title, ''), new.content_text);
|
||||
END;
|
||||
|
||||
-- 2k. Recreate embeddings cleanup trigger (from migration 009)
|
||||
CREATE TRIGGER documents_embeddings_ad AFTER DELETE ON documents BEGIN
|
||||
DELETE FROM embeddings
|
||||
WHERE rowid >= old.id * 1000
|
||||
AND rowid < (old.id + 1) * 1000;
|
||||
END;
|
||||
|
||||
-- 2l. Rebuild FTS index to ensure consistency after table swap
|
||||
INSERT INTO documents_fts(documents_fts) VALUES('rebuild');
|
||||
|
||||
-- ============================================================
|
||||
-- 3. Defense triggers: clean up documents when notes are
|
||||
-- deleted or flipped to system notes
|
||||
-- ============================================================
|
||||
|
||||
CREATE TRIGGER notes_ad_cleanup AFTER DELETE ON notes
|
||||
WHEN old.is_system = 0
|
||||
BEGIN
|
||||
DELETE FROM documents WHERE source_type = 'note' AND source_id = old.id;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER notes_au_system_cleanup AFTER UPDATE OF is_system ON notes
|
||||
WHEN NEW.is_system = 1 AND OLD.is_system = 0
|
||||
BEGIN
|
||||
DELETE FROM documents WHERE source_type = 'note' AND source_id = OLD.id;
|
||||
END;
|
||||
|
||||
-- ============================================================
|
||||
-- 4. Drop temp backup tables
|
||||
-- ============================================================
|
||||
|
||||
DROP TABLE IF EXISTS _doc_labels_backup;
|
||||
DROP TABLE IF EXISTS _doc_paths_backup;
|
||||
Reference in New Issue
Block a user