diff --git a/migrations/.gitkeep b/migrations/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/migrations/001_initial.sql b/migrations/001_initial.sql new file mode 100644 index 0000000..f9a70f1 --- /dev/null +++ b/migrations/001_initial.sql @@ -0,0 +1,68 @@ +-- Schema version tracking +CREATE TABLE IF NOT EXISTS schema_version ( + version INTEGER PRIMARY KEY, + applied_at INTEGER NOT NULL, -- ms epoch UTC + description TEXT +); + +INSERT INTO schema_version (version, applied_at, description) +VALUES (1, strftime('%s', 'now') * 1000, 'Initial schema'); + +-- Projects table (configured targets) +CREATE TABLE projects ( + id INTEGER PRIMARY KEY, + gitlab_project_id INTEGER UNIQUE NOT NULL, + path_with_namespace TEXT NOT NULL, + default_branch TEXT, + web_url TEXT, + created_at INTEGER, -- ms epoch UTC + updated_at INTEGER, -- ms epoch UTC + raw_payload_id INTEGER REFERENCES raw_payloads(id) +); +CREATE INDEX idx_projects_path ON projects(path_with_namespace); + +-- Sync tracking for reliability +CREATE TABLE sync_runs ( + id INTEGER PRIMARY KEY, + started_at INTEGER NOT NULL, -- ms epoch UTC + heartbeat_at INTEGER NOT NULL, -- ms epoch UTC + finished_at INTEGER, -- ms epoch UTC + status TEXT NOT NULL, -- 'running' | 'succeeded' | 'failed' + command TEXT NOT NULL, -- 'init' | 'ingest issues' | 'sync' | etc. + error TEXT, + metrics_json TEXT -- JSON blob of per-run counters/timing +); + +-- Crash-safe single-flight lock (DB-enforced) +CREATE TABLE app_locks ( + name TEXT PRIMARY KEY, -- 'sync' + owner TEXT NOT NULL, -- random run token (UUIDv4) + acquired_at INTEGER NOT NULL, -- ms epoch UTC + heartbeat_at INTEGER NOT NULL -- ms epoch UTC +); + +-- Sync cursors for primary resources only +CREATE TABLE sync_cursors ( + project_id INTEGER NOT NULL REFERENCES projects(id), + resource_type TEXT NOT NULL, -- 'issues' | 'merge_requests' + updated_at_cursor INTEGER, -- ms epoch UTC, last fully processed + tie_breaker_id INTEGER, -- last fully processed gitlab_id + PRIMARY KEY(project_id, resource_type) +); + +-- Raw payload storage (decoupled from entity tables) +CREATE TABLE raw_payloads ( + id INTEGER PRIMARY KEY, + source TEXT NOT NULL, -- 'gitlab' + project_id INTEGER REFERENCES projects(id), + resource_type TEXT NOT NULL, -- 'project' | 'issue' | 'mr' | 'note' | 'discussion' + gitlab_id TEXT NOT NULL, -- TEXT: discussion IDs are strings + fetched_at INTEGER NOT NULL, -- ms epoch UTC + content_encoding TEXT NOT NULL DEFAULT 'identity', -- 'identity' | 'gzip' + payload_hash TEXT NOT NULL, -- SHA-256 of decoded JSON bytes (pre-compression) + payload BLOB NOT NULL -- raw JSON or gzip-compressed JSON +); +CREATE INDEX idx_raw_payloads_lookup ON raw_payloads(project_id, resource_type, gitlab_id); +CREATE INDEX idx_raw_payloads_history ON raw_payloads(project_id, resource_type, gitlab_id, fetched_at); +CREATE UNIQUE INDEX uq_raw_payloads_dedupe + ON raw_payloads(project_id, resource_type, gitlab_id, payload_hash); diff --git a/migrations/002_issues.sql b/migrations/002_issues.sql new file mode 100644 index 0000000..6699edd --- /dev/null +++ b/migrations/002_issues.sql @@ -0,0 +1,105 @@ +-- Migration 002: Issue Ingestion Tables +-- Applies on top of 001_initial.sql + +-- Issues table +CREATE TABLE issues ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER UNIQUE NOT NULL, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + iid INTEGER NOT NULL, + title TEXT, + description TEXT, + state TEXT NOT NULL CHECK (state IN ('opened', 'closed')), + author_username TEXT, + created_at INTEGER NOT NULL, -- ms epoch UTC + updated_at INTEGER NOT NULL, -- ms epoch UTC + last_seen_at INTEGER NOT NULL, -- updated on every upsert + discussions_synced_for_updated_at INTEGER, -- watermark for dependent sync + web_url TEXT, + raw_payload_id INTEGER REFERENCES raw_payloads(id) +); + +CREATE INDEX idx_issues_project_updated ON issues(project_id, updated_at); +CREATE INDEX idx_issues_author ON issues(author_username); +CREATE UNIQUE INDEX uq_issues_project_iid ON issues(project_id, iid); + +-- Labels table (name-only for CP1) +CREATE TABLE labels ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER, -- optional, for future Labels API + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + name TEXT NOT NULL, + color TEXT, + description TEXT +); + +CREATE UNIQUE INDEX uq_labels_project_name ON labels(project_id, name); +CREATE INDEX idx_labels_name ON labels(name); + +-- Issue-label junction (DELETE before INSERT for stale removal) +CREATE TABLE issue_labels ( + issue_id INTEGER NOT NULL REFERENCES issues(id) ON DELETE CASCADE, + label_id INTEGER NOT NULL REFERENCES labels(id) ON DELETE CASCADE, + PRIMARY KEY(issue_id, label_id) +); + +CREATE INDEX idx_issue_labels_label ON issue_labels(label_id); + +-- Discussion threads for issues (MR discussions added in CP2) +CREATE TABLE discussions ( + id INTEGER PRIMARY KEY, + gitlab_discussion_id TEXT NOT NULL, -- GitLab string ID (e.g., "6a9c1750b37d...") + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + issue_id INTEGER REFERENCES issues(id) ON DELETE CASCADE, + merge_request_id INTEGER, -- FK added in CP2 via ALTER TABLE + noteable_type TEXT NOT NULL CHECK (noteable_type IN ('Issue', 'MergeRequest')), + individual_note INTEGER NOT NULL DEFAULT 0, -- 0=threaded, 1=standalone + first_note_at INTEGER, -- min(note.created_at) for ordering + last_note_at INTEGER, -- max(note.created_at) for "recently active" + last_seen_at INTEGER NOT NULL, -- updated on every upsert + resolvable INTEGER NOT NULL DEFAULT 0, -- MR discussions can be resolved + resolved INTEGER NOT NULL DEFAULT 0, + CHECK ( + (noteable_type = 'Issue' AND issue_id IS NOT NULL AND merge_request_id IS NULL) OR + (noteable_type = 'MergeRequest' AND merge_request_id IS NOT NULL AND issue_id IS NULL) + ) +); + +CREATE UNIQUE INDEX uq_discussions_project_discussion_id ON discussions(project_id, gitlab_discussion_id); +CREATE INDEX idx_discussions_issue ON discussions(issue_id); +CREATE INDEX idx_discussions_mr ON discussions(merge_request_id); +CREATE INDEX idx_discussions_last_note ON discussions(last_note_at); + +-- Notes belong to discussions +CREATE TABLE notes ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER UNIQUE NOT NULL, + discussion_id INTEGER NOT NULL REFERENCES discussions(id) ON DELETE CASCADE, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + note_type TEXT, -- 'DiscussionNote' | 'DiffNote' | null + is_system INTEGER NOT NULL DEFAULT 0, -- 1 for system-generated notes + author_username TEXT, + body TEXT, + created_at INTEGER NOT NULL, -- ms epoch + updated_at INTEGER NOT NULL, -- ms epoch + last_seen_at INTEGER NOT NULL, -- updated on every upsert + position INTEGER, -- 0-indexed array order from API + resolvable INTEGER NOT NULL DEFAULT 0, + resolved INTEGER NOT NULL DEFAULT 0, + resolved_by TEXT, + resolved_at INTEGER, + -- DiffNote position metadata (populated for MR DiffNotes in CP2) + position_old_path TEXT, + position_new_path TEXT, + position_old_line INTEGER, + position_new_line INTEGER, + raw_payload_id INTEGER REFERENCES raw_payloads(id) +); + +CREATE INDEX idx_notes_discussion ON notes(discussion_id); +CREATE INDEX idx_notes_author ON notes(author_username); +CREATE INDEX idx_notes_system ON notes(is_system); + +-- Update schema version +INSERT INTO schema_version (version, applied_at, description) +VALUES (2, strftime('%s', 'now') * 1000, 'Issue ingestion tables'); diff --git a/migrations/003_indexes.sql b/migrations/003_indexes.sql new file mode 100644 index 0000000..d74b8cb --- /dev/null +++ b/migrations/003_indexes.sql @@ -0,0 +1,12 @@ +-- Migration 003: Performance and Orphan Detection Indexes +-- Adds indexes for efficient orphan detection queries + +-- Index for orphan detection: find issues/discussions not seen recently +-- Used by cleanup routines to identify potentially stale data +CREATE INDEX IF NOT EXISTS idx_issues_last_seen ON issues(last_seen_at); +CREATE INDEX IF NOT EXISTS idx_discussions_last_seen ON discussions(last_seen_at); +CREATE INDEX IF NOT EXISTS idx_notes_last_seen ON notes(last_seen_at); + +-- Update schema version +INSERT INTO schema_version (version, applied_at, description) +VALUES (3, strftime('%s', 'now') * 1000, 'Performance and orphan detection indexes'); diff --git a/migrations/004_discussions_payload.sql b/migrations/004_discussions_payload.sql new file mode 100644 index 0000000..a073a55 --- /dev/null +++ b/migrations/004_discussions_payload.sql @@ -0,0 +1,8 @@ +-- Migration 004: Add raw_payload_id to discussions +-- The column was expected by code but missing from initial schema + +ALTER TABLE discussions ADD COLUMN raw_payload_id INTEGER REFERENCES raw_payloads(id); + +-- Update schema version +INSERT INTO schema_version (version, applied_at, description) +VALUES (4, strftime('%s', 'now') * 1000, 'Add raw_payload_id to discussions'); diff --git a/migrations/005_assignees_milestone_duedate.sql b/migrations/005_assignees_milestone_duedate.sql new file mode 100644 index 0000000..17aad98 --- /dev/null +++ b/migrations/005_assignees_milestone_duedate.sql @@ -0,0 +1,43 @@ +-- Migration 005: Add assignees, milestone, and due_date support +-- Schema version: 5 + +-- Add new columns to issues table +ALTER TABLE issues ADD COLUMN due_date TEXT; -- YYYY-MM-DD format, nullable +ALTER TABLE issues ADD COLUMN milestone_id INTEGER; -- Local milestone ID (FK to milestones.id) +ALTER TABLE issues ADD COLUMN milestone_title TEXT; -- Denormalized for quick display + +-- Milestones table (captures key fields for filtering and display) +CREATE TABLE IF NOT EXISTS milestones ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER NOT NULL, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + iid INTEGER NOT NULL, -- Project-scoped milestone number + title TEXT NOT NULL, + description TEXT, + state TEXT, -- 'active' or 'closed' + due_date TEXT, -- YYYY-MM-DD + web_url TEXT, + UNIQUE(project_id, gitlab_id) +); + +CREATE INDEX IF NOT EXISTS idx_milestones_project ON milestones(project_id); +CREATE INDEX IF NOT EXISTS idx_milestones_state ON milestones(project_id, state); + +-- Issue assignees junction table (issues can have multiple assignees) +CREATE TABLE IF NOT EXISTS issue_assignees ( + issue_id INTEGER NOT NULL REFERENCES issues(id) ON DELETE CASCADE, + username TEXT NOT NULL, + PRIMARY KEY (issue_id, username) +); + +CREATE INDEX IF NOT EXISTS idx_issue_assignees_username ON issue_assignees(username); + +-- Index for due_date filtering +CREATE INDEX IF NOT EXISTS idx_issues_due_date ON issues(due_date); + +-- Index for milestone filtering +CREATE INDEX IF NOT EXISTS idx_issues_milestone ON issues(milestone_id); + +-- Update schema version +INSERT INTO schema_version (version, applied_at, description) +VALUES (5, strftime('%s', 'now') * 1000, 'Add assignees, milestone, and due_date support');