-- Migration 007: Documents, Document Labels, Document Paths, Dirty Sources, Pending Discussion Fetches -- Schema version: 7 -- Adds CP3 document storage and queue tables for search pipeline -- Unified searchable documents (derived from issues/MRs/discussions) CREATE TABLE documents ( id INTEGER PRIMARY KEY, source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')), source_id INTEGER NOT NULL, -- local DB id in the source table project_id INTEGER NOT NULL REFERENCES projects(id), author_username TEXT, -- for discussions: first note author label_names TEXT, -- JSON array (display/debug only) created_at INTEGER, -- ms epoch UTC updated_at INTEGER, -- ms epoch UTC url TEXT, title TEXT, -- null for discussions content_text TEXT NOT NULL, -- canonical text for embedding/search content_hash TEXT NOT NULL, -- SHA-256 for change detection labels_hash TEXT NOT NULL DEFAULT '', -- SHA-256 over sorted labels (write optimization) paths_hash TEXT NOT NULL DEFAULT '', -- SHA-256 over sorted paths (write optimization) is_truncated INTEGER NOT NULL DEFAULT 0, truncated_reason TEXT CHECK ( truncated_reason IN ( 'token_limit_middle_drop','single_note_oversized','first_last_oversized', 'hard_cap_oversized' ) OR truncated_reason IS NULL ), UNIQUE(source_type, source_id) ); CREATE INDEX idx_documents_project_updated ON documents(project_id, updated_at); CREATE INDEX idx_documents_author ON documents(author_username); CREATE INDEX idx_documents_source ON documents(source_type, source_id); CREATE INDEX idx_documents_hash ON documents(content_hash); -- Fast label filtering (indexed exact-match) CREATE TABLE document_labels ( document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE, label_name TEXT NOT NULL, PRIMARY KEY(document_id, label_name) ) WITHOUT ROWID; CREATE INDEX idx_document_labels_label ON document_labels(label_name); -- Fast path filtering (DiffNote file paths) CREATE TABLE document_paths ( document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE, path TEXT NOT NULL, PRIMARY KEY(document_id, path) ) WITHOUT ROWID; CREATE INDEX idx_document_paths_path ON document_paths(path); -- Queue for incremental document regeneration (with retry tracking) -- Uses next_attempt_at for index-friendly backoff queries CREATE TABLE dirty_sources ( source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')), source_id INTEGER NOT NULL, queued_at INTEGER NOT NULL, -- ms epoch UTC attempt_count INTEGER NOT NULL DEFAULT 0, last_attempt_at INTEGER, last_error TEXT, next_attempt_at INTEGER, -- ms epoch UTC; NULL means ready immediately PRIMARY KEY(source_type, source_id) ); CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at); -- Resumable queue for dependent discussion fetching -- Uses next_attempt_at for index-friendly backoff queries CREATE TABLE pending_discussion_fetches ( project_id INTEGER NOT NULL REFERENCES projects(id), noteable_type TEXT NOT NULL, -- 'Issue' | 'MergeRequest' noteable_iid INTEGER NOT NULL, queued_at INTEGER NOT NULL, -- ms epoch UTC attempt_count INTEGER NOT NULL DEFAULT 0, last_attempt_at INTEGER, last_error TEXT, next_attempt_at INTEGER, -- ms epoch UTC; NULL means ready immediately PRIMARY KEY(project_id, noteable_type, noteable_iid) ); CREATE INDEX idx_pending_discussions_next_attempt ON pending_discussion_fetches(next_attempt_at); -- Update schema version INSERT INTO schema_version (version, applied_at, description) VALUES (7, strftime('%s', 'now') * 1000, 'Documents, labels, paths, dirty sources, pending discussion fetches');