-- Migration 007: Documents, Document Labels, Document Paths, Dirty Sources, Pending Discussion Fetches
-- Schema version: 7
-- Adds CP3 document storage and queue tables for search pipeline

-- Unified searchable documents (derived from issues/MRs/discussions)
CREATE TABLE documents (
  id INTEGER PRIMARY KEY,
  source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')),
  source_id INTEGER NOT NULL,    -- local DB id in the source table
  project_id INTEGER NOT NULL REFERENCES projects(id),
  author_username TEXT,          -- for discussions: first note author
  label_names TEXT,              -- JSON array (display/debug only)
  created_at INTEGER,            -- ms epoch UTC
  updated_at INTEGER,            -- ms epoch UTC
  url TEXT,
  title TEXT,                    -- null for discussions
  content_text TEXT NOT NULL,    -- canonical text for embedding/search
  content_hash TEXT NOT NULL,    -- SHA-256 for change detection
  labels_hash TEXT NOT NULL DEFAULT '',  -- SHA-256 over sorted labels (write optimization)
  paths_hash TEXT NOT NULL DEFAULT '',   -- SHA-256 over sorted paths (write optimization)
  is_truncated INTEGER NOT NULL DEFAULT 0,
  truncated_reason TEXT CHECK (
    truncated_reason IN (
      'token_limit_middle_drop','single_note_oversized','first_last_oversized',
      'hard_cap_oversized'
    )
    OR truncated_reason IS NULL
  ),
  UNIQUE(source_type, source_id)
);

CREATE INDEX idx_documents_project_updated ON documents(project_id, updated_at);
CREATE INDEX idx_documents_author ON documents(author_username);
CREATE INDEX idx_documents_source ON documents(source_type, source_id);
CREATE INDEX idx_documents_hash ON documents(content_hash);

-- Fast label filtering (indexed exact-match)
CREATE TABLE document_labels (
  document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  label_name TEXT NOT NULL,
  PRIMARY KEY(document_id, label_name)
) WITHOUT ROWID;
CREATE INDEX idx_document_labels_label ON document_labels(label_name);

-- Fast path filtering (DiffNote file paths)
CREATE TABLE document_paths (
  document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  path TEXT NOT NULL,
  PRIMARY KEY(document_id, path)
) WITHOUT ROWID;
CREATE INDEX idx_document_paths_path ON document_paths(path);

-- Queue for incremental document regeneration (with retry tracking)
-- Uses next_attempt_at for index-friendly backoff queries
CREATE TABLE dirty_sources (
  source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')),
  source_id INTEGER NOT NULL,
  queued_at INTEGER NOT NULL,    -- ms epoch UTC
  attempt_count INTEGER NOT NULL DEFAULT 0,
  last_attempt_at INTEGER,
  last_error TEXT,
  next_attempt_at INTEGER,       -- ms epoch UTC; NULL means ready immediately
  PRIMARY KEY(source_type, source_id)
);
CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at);

-- Resumable queue for dependent discussion fetching
-- Uses next_attempt_at for index-friendly backoff queries
CREATE TABLE pending_discussion_fetches (
  project_id INTEGER NOT NULL REFERENCES projects(id),
  noteable_type TEXT NOT NULL,            -- 'Issue' | 'MergeRequest'
  noteable_iid INTEGER NOT NULL,
  queued_at INTEGER NOT NULL,             -- ms epoch UTC
  attempt_count INTEGER NOT NULL DEFAULT 0,
  last_attempt_at INTEGER,
  last_error TEXT,
  next_attempt_at INTEGER,                -- ms epoch UTC; NULL means ready immediately
  PRIMARY KEY(project_id, noteable_type, noteable_iid)
);
CREATE INDEX idx_pending_discussions_next_attempt ON pending_discussion_fetches(next_attempt_at);

-- Update schema version
INSERT INTO schema_version (version, applied_at, description)
VALUES (7, strftime('%s', 'now') * 1000, 'Documents, labels, paths, dirty sources, pending discussion fetches');