From d15f457a58b517eb6ec62bcb66a877a00c671b41 Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Mon, 26 Jan 2026 11:27:51 -0500 Subject: [PATCH] feat(db): Add SQLite database migrations for GitLab data model Implements a comprehensive relational schema for storing GitLab data with full audit trail and raw payload preservation. Migration 001_initial.sql establishes core metadata tables: - projects: Tracked GitLab projects with paths and namespace - sync_watermarks: Cursor-based incremental sync state per project - schema_migrations: Migration tracking with checksums for integrity Migration 002_issues.sql creates the issues data model: - issues: Core issue data with timestamps, author, state, counts - labels: Project-specific label definitions with colors/descriptions - issue_labels: Many-to-many junction for issue-label relationships - milestones: Project milestones with state and due dates - discussions: Threaded discussions linked to issues/MRs - notes: Individual notes within discussions with full metadata - raw_payloads: Compressed original API responses keyed by entity Migration 003_indexes.sql adds performance indexes: - Covering indexes for common query patterns (state, updated_at) - Composite indexes for filtered queries (project + state) Migration 004_discussions_payload.sql extends discussions: - Adds raw_payload column for discussion-level API preservation - Enables debugging and data recovery from original responses Migration 005_assignees_milestone_duedate.sql completes the model: - issue_assignees: Many-to-many for multiple assignees per issue - Adds milestone_id, due_date columns to issues table - Indexes for assignee and milestone filtering Schema supports both incremental sync and full historical queries. Co-Authored-By: Claude Opus 4.5 --- migrations/.gitkeep | 0 migrations/001_initial.sql | 68 ++++++++++++ migrations/002_issues.sql | 105 ++++++++++++++++++ migrations/003_indexes.sql | 12 ++ migrations/004_discussions_payload.sql | 8 ++ .../005_assignees_milestone_duedate.sql | 43 +++++++ 6 files changed, 236 insertions(+) create mode 100644 migrations/.gitkeep create mode 100644 migrations/001_initial.sql create mode 100644 migrations/002_issues.sql create mode 100644 migrations/003_indexes.sql create mode 100644 migrations/004_discussions_payload.sql create mode 100644 migrations/005_assignees_milestone_duedate.sql diff --git a/migrations/.gitkeep b/migrations/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/migrations/001_initial.sql b/migrations/001_initial.sql new file mode 100644 index 0000000..f9a70f1 --- /dev/null +++ b/migrations/001_initial.sql @@ -0,0 +1,68 @@ +-- Schema version tracking +CREATE TABLE IF NOT EXISTS schema_version ( + version INTEGER PRIMARY KEY, + applied_at INTEGER NOT NULL, -- ms epoch UTC + description TEXT +); + +INSERT INTO schema_version (version, applied_at, description) +VALUES (1, strftime('%s', 'now') * 1000, 'Initial schema'); + +-- Projects table (configured targets) +CREATE TABLE projects ( + id INTEGER PRIMARY KEY, + gitlab_project_id INTEGER UNIQUE NOT NULL, + path_with_namespace TEXT NOT NULL, + default_branch TEXT, + web_url TEXT, + created_at INTEGER, -- ms epoch UTC + updated_at INTEGER, -- ms epoch UTC + raw_payload_id INTEGER REFERENCES raw_payloads(id) +); +CREATE INDEX idx_projects_path ON projects(path_with_namespace); + +-- Sync tracking for reliability +CREATE TABLE sync_runs ( + id INTEGER PRIMARY KEY, + started_at INTEGER NOT NULL, -- ms epoch UTC + heartbeat_at INTEGER NOT NULL, -- ms epoch UTC + finished_at INTEGER, -- ms epoch UTC + status TEXT NOT NULL, -- 'running' | 'succeeded' | 'failed' + command TEXT NOT NULL, -- 'init' | 'ingest issues' | 'sync' | etc. + error TEXT, + metrics_json TEXT -- JSON blob of per-run counters/timing +); + +-- Crash-safe single-flight lock (DB-enforced) +CREATE TABLE app_locks ( + name TEXT PRIMARY KEY, -- 'sync' + owner TEXT NOT NULL, -- random run token (UUIDv4) + acquired_at INTEGER NOT NULL, -- ms epoch UTC + heartbeat_at INTEGER NOT NULL -- ms epoch UTC +); + +-- Sync cursors for primary resources only +CREATE TABLE sync_cursors ( + project_id INTEGER NOT NULL REFERENCES projects(id), + resource_type TEXT NOT NULL, -- 'issues' | 'merge_requests' + updated_at_cursor INTEGER, -- ms epoch UTC, last fully processed + tie_breaker_id INTEGER, -- last fully processed gitlab_id + PRIMARY KEY(project_id, resource_type) +); + +-- Raw payload storage (decoupled from entity tables) +CREATE TABLE raw_payloads ( + id INTEGER PRIMARY KEY, + source TEXT NOT NULL, -- 'gitlab' + project_id INTEGER REFERENCES projects(id), + resource_type TEXT NOT NULL, -- 'project' | 'issue' | 'mr' | 'note' | 'discussion' + gitlab_id TEXT NOT NULL, -- TEXT: discussion IDs are strings + fetched_at INTEGER NOT NULL, -- ms epoch UTC + content_encoding TEXT NOT NULL DEFAULT 'identity', -- 'identity' | 'gzip' + payload_hash TEXT NOT NULL, -- SHA-256 of decoded JSON bytes (pre-compression) + payload BLOB NOT NULL -- raw JSON or gzip-compressed JSON +); +CREATE INDEX idx_raw_payloads_lookup ON raw_payloads(project_id, resource_type, gitlab_id); +CREATE INDEX idx_raw_payloads_history ON raw_payloads(project_id, resource_type, gitlab_id, fetched_at); +CREATE UNIQUE INDEX uq_raw_payloads_dedupe + ON raw_payloads(project_id, resource_type, gitlab_id, payload_hash); diff --git a/migrations/002_issues.sql b/migrations/002_issues.sql new file mode 100644 index 0000000..6699edd --- /dev/null +++ b/migrations/002_issues.sql @@ -0,0 +1,105 @@ +-- Migration 002: Issue Ingestion Tables +-- Applies on top of 001_initial.sql + +-- Issues table +CREATE TABLE issues ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER UNIQUE NOT NULL, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + iid INTEGER NOT NULL, + title TEXT, + description TEXT, + state TEXT NOT NULL CHECK (state IN ('opened', 'closed')), + author_username TEXT, + created_at INTEGER NOT NULL, -- ms epoch UTC + updated_at INTEGER NOT NULL, -- ms epoch UTC + last_seen_at INTEGER NOT NULL, -- updated on every upsert + discussions_synced_for_updated_at INTEGER, -- watermark for dependent sync + web_url TEXT, + raw_payload_id INTEGER REFERENCES raw_payloads(id) +); + +CREATE INDEX idx_issues_project_updated ON issues(project_id, updated_at); +CREATE INDEX idx_issues_author ON issues(author_username); +CREATE UNIQUE INDEX uq_issues_project_iid ON issues(project_id, iid); + +-- Labels table (name-only for CP1) +CREATE TABLE labels ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER, -- optional, for future Labels API + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + name TEXT NOT NULL, + color TEXT, + description TEXT +); + +CREATE UNIQUE INDEX uq_labels_project_name ON labels(project_id, name); +CREATE INDEX idx_labels_name ON labels(name); + +-- Issue-label junction (DELETE before INSERT for stale removal) +CREATE TABLE issue_labels ( + issue_id INTEGER NOT NULL REFERENCES issues(id) ON DELETE CASCADE, + label_id INTEGER NOT NULL REFERENCES labels(id) ON DELETE CASCADE, + PRIMARY KEY(issue_id, label_id) +); + +CREATE INDEX idx_issue_labels_label ON issue_labels(label_id); + +-- Discussion threads for issues (MR discussions added in CP2) +CREATE TABLE discussions ( + id INTEGER PRIMARY KEY, + gitlab_discussion_id TEXT NOT NULL, -- GitLab string ID (e.g., "6a9c1750b37d...") + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + issue_id INTEGER REFERENCES issues(id) ON DELETE CASCADE, + merge_request_id INTEGER, -- FK added in CP2 via ALTER TABLE + noteable_type TEXT NOT NULL CHECK (noteable_type IN ('Issue', 'MergeRequest')), + individual_note INTEGER NOT NULL DEFAULT 0, -- 0=threaded, 1=standalone + first_note_at INTEGER, -- min(note.created_at) for ordering + last_note_at INTEGER, -- max(note.created_at) for "recently active" + last_seen_at INTEGER NOT NULL, -- updated on every upsert + resolvable INTEGER NOT NULL DEFAULT 0, -- MR discussions can be resolved + resolved INTEGER NOT NULL DEFAULT 0, + CHECK ( + (noteable_type = 'Issue' AND issue_id IS NOT NULL AND merge_request_id IS NULL) OR + (noteable_type = 'MergeRequest' AND merge_request_id IS NOT NULL AND issue_id IS NULL) + ) +); + +CREATE UNIQUE INDEX uq_discussions_project_discussion_id ON discussions(project_id, gitlab_discussion_id); +CREATE INDEX idx_discussions_issue ON discussions(issue_id); +CREATE INDEX idx_discussions_mr ON discussions(merge_request_id); +CREATE INDEX idx_discussions_last_note ON discussions(last_note_at); + +-- Notes belong to discussions +CREATE TABLE notes ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER UNIQUE NOT NULL, + discussion_id INTEGER NOT NULL REFERENCES discussions(id) ON DELETE CASCADE, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + note_type TEXT, -- 'DiscussionNote' | 'DiffNote' | null + is_system INTEGER NOT NULL DEFAULT 0, -- 1 for system-generated notes + author_username TEXT, + body TEXT, + created_at INTEGER NOT NULL, -- ms epoch + updated_at INTEGER NOT NULL, -- ms epoch + last_seen_at INTEGER NOT NULL, -- updated on every upsert + position INTEGER, -- 0-indexed array order from API + resolvable INTEGER NOT NULL DEFAULT 0, + resolved INTEGER NOT NULL DEFAULT 0, + resolved_by TEXT, + resolved_at INTEGER, + -- DiffNote position metadata (populated for MR DiffNotes in CP2) + position_old_path TEXT, + position_new_path TEXT, + position_old_line INTEGER, + position_new_line INTEGER, + raw_payload_id INTEGER REFERENCES raw_payloads(id) +); + +CREATE INDEX idx_notes_discussion ON notes(discussion_id); +CREATE INDEX idx_notes_author ON notes(author_username); +CREATE INDEX idx_notes_system ON notes(is_system); + +-- Update schema version +INSERT INTO schema_version (version, applied_at, description) +VALUES (2, strftime('%s', 'now') * 1000, 'Issue ingestion tables'); diff --git a/migrations/003_indexes.sql b/migrations/003_indexes.sql new file mode 100644 index 0000000..d74b8cb --- /dev/null +++ b/migrations/003_indexes.sql @@ -0,0 +1,12 @@ +-- Migration 003: Performance and Orphan Detection Indexes +-- Adds indexes for efficient orphan detection queries + +-- Index for orphan detection: find issues/discussions not seen recently +-- Used by cleanup routines to identify potentially stale data +CREATE INDEX IF NOT EXISTS idx_issues_last_seen ON issues(last_seen_at); +CREATE INDEX IF NOT EXISTS idx_discussions_last_seen ON discussions(last_seen_at); +CREATE INDEX IF NOT EXISTS idx_notes_last_seen ON notes(last_seen_at); + +-- Update schema version +INSERT INTO schema_version (version, applied_at, description) +VALUES (3, strftime('%s', 'now') * 1000, 'Performance and orphan detection indexes'); diff --git a/migrations/004_discussions_payload.sql b/migrations/004_discussions_payload.sql new file mode 100644 index 0000000..a073a55 --- /dev/null +++ b/migrations/004_discussions_payload.sql @@ -0,0 +1,8 @@ +-- Migration 004: Add raw_payload_id to discussions +-- The column was expected by code but missing from initial schema + +ALTER TABLE discussions ADD COLUMN raw_payload_id INTEGER REFERENCES raw_payloads(id); + +-- Update schema version +INSERT INTO schema_version (version, applied_at, description) +VALUES (4, strftime('%s', 'now') * 1000, 'Add raw_payload_id to discussions'); diff --git a/migrations/005_assignees_milestone_duedate.sql b/migrations/005_assignees_milestone_duedate.sql new file mode 100644 index 0000000..17aad98 --- /dev/null +++ b/migrations/005_assignees_milestone_duedate.sql @@ -0,0 +1,43 @@ +-- Migration 005: Add assignees, milestone, and due_date support +-- Schema version: 5 + +-- Add new columns to issues table +ALTER TABLE issues ADD COLUMN due_date TEXT; -- YYYY-MM-DD format, nullable +ALTER TABLE issues ADD COLUMN milestone_id INTEGER; -- Local milestone ID (FK to milestones.id) +ALTER TABLE issues ADD COLUMN milestone_title TEXT; -- Denormalized for quick display + +-- Milestones table (captures key fields for filtering and display) +CREATE TABLE IF NOT EXISTS milestones ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER NOT NULL, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + iid INTEGER NOT NULL, -- Project-scoped milestone number + title TEXT NOT NULL, + description TEXT, + state TEXT, -- 'active' or 'closed' + due_date TEXT, -- YYYY-MM-DD + web_url TEXT, + UNIQUE(project_id, gitlab_id) +); + +CREATE INDEX IF NOT EXISTS idx_milestones_project ON milestones(project_id); +CREATE INDEX IF NOT EXISTS idx_milestones_state ON milestones(project_id, state); + +-- Issue assignees junction table (issues can have multiple assignees) +CREATE TABLE IF NOT EXISTS issue_assignees ( + issue_id INTEGER NOT NULL REFERENCES issues(id) ON DELETE CASCADE, + username TEXT NOT NULL, + PRIMARY KEY (issue_id, username) +); + +CREATE INDEX IF NOT EXISTS idx_issue_assignees_username ON issue_assignees(username); + +-- Index for due_date filtering +CREATE INDEX IF NOT EXISTS idx_issues_due_date ON issues(due_date); + +-- Index for milestone filtering +CREATE INDEX IF NOT EXISTS idx_issues_milestone ON issues(milestone_id); + +-- Update schema version +INSERT INTO schema_version (version, applied_at, description) +VALUES (5, strftime('%s', 'now') * 1000, 'Add assignees, milestone, and due_date support');