feat(db): Add SQLite database migrations for GitLab data model

Implements a comprehensive relational schema for storing GitLab data
with full audit trail and raw payload preservation.

Migration 001_initial.sql establishes core metadata tables:
- projects: Tracked GitLab projects with paths and namespace
- sync_watermarks: Cursor-based incremental sync state per project
- schema_migrations: Migration tracking with checksums for integrity

Migration 002_issues.sql creates the issues data model:
- issues: Core issue data with timestamps, author, state, counts
- labels: Project-specific label definitions with colors/descriptions
- issue_labels: Many-to-many junction for issue-label relationships
- milestones: Project milestones with state and due dates
- discussions: Threaded discussions linked to issues/MRs
- notes: Individual notes within discussions with full metadata
- raw_payloads: Compressed original API responses keyed by entity

Migration 003_indexes.sql adds performance indexes:
- Covering indexes for common query patterns (state, updated_at)
- Composite indexes for filtered queries (project + state)

Migration 004_discussions_payload.sql extends discussions:
- Adds raw_payload column for discussion-level API preservation
- Enables debugging and data recovery from original responses

Migration 005_assignees_milestone_duedate.sql completes the model:
- issue_assignees: Many-to-many for multiple assignees per issue
- Adds milestone_id, due_date columns to issues table
- Indexes for assignee and milestone filtering

Schema supports both incremental sync and full historical queries.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-01-26 11:27:51 -05:00
parent 986bc59f6a
commit d15f457a58
6 changed files with 236 additions and 0 deletions

0
migrations/.gitkeep Normal file
View File

View File

@@ -0,0 +1,68 @@
-- Schema version tracking
CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER PRIMARY KEY,
applied_at INTEGER NOT NULL, -- ms epoch UTC
description TEXT
);
INSERT INTO schema_version (version, applied_at, description)
VALUES (1, strftime('%s', 'now') * 1000, 'Initial schema');
-- Projects table (configured targets)
CREATE TABLE projects (
id INTEGER PRIMARY KEY,
gitlab_project_id INTEGER UNIQUE NOT NULL,
path_with_namespace TEXT NOT NULL,
default_branch TEXT,
web_url TEXT,
created_at INTEGER, -- ms epoch UTC
updated_at INTEGER, -- ms epoch UTC
raw_payload_id INTEGER REFERENCES raw_payloads(id)
);
CREATE INDEX idx_projects_path ON projects(path_with_namespace);
-- Sync tracking for reliability
CREATE TABLE sync_runs (
id INTEGER PRIMARY KEY,
started_at INTEGER NOT NULL, -- ms epoch UTC
heartbeat_at INTEGER NOT NULL, -- ms epoch UTC
finished_at INTEGER, -- ms epoch UTC
status TEXT NOT NULL, -- 'running' | 'succeeded' | 'failed'
command TEXT NOT NULL, -- 'init' | 'ingest issues' | 'sync' | etc.
error TEXT,
metrics_json TEXT -- JSON blob of per-run counters/timing
);
-- Crash-safe single-flight lock (DB-enforced)
CREATE TABLE app_locks (
name TEXT PRIMARY KEY, -- 'sync'
owner TEXT NOT NULL, -- random run token (UUIDv4)
acquired_at INTEGER NOT NULL, -- ms epoch UTC
heartbeat_at INTEGER NOT NULL -- ms epoch UTC
);
-- Sync cursors for primary resources only
CREATE TABLE sync_cursors (
project_id INTEGER NOT NULL REFERENCES projects(id),
resource_type TEXT NOT NULL, -- 'issues' | 'merge_requests'
updated_at_cursor INTEGER, -- ms epoch UTC, last fully processed
tie_breaker_id INTEGER, -- last fully processed gitlab_id
PRIMARY KEY(project_id, resource_type)
);
-- Raw payload storage (decoupled from entity tables)
CREATE TABLE raw_payloads (
id INTEGER PRIMARY KEY,
source TEXT NOT NULL, -- 'gitlab'
project_id INTEGER REFERENCES projects(id),
resource_type TEXT NOT NULL, -- 'project' | 'issue' | 'mr' | 'note' | 'discussion'
gitlab_id TEXT NOT NULL, -- TEXT: discussion IDs are strings
fetched_at INTEGER NOT NULL, -- ms epoch UTC
content_encoding TEXT NOT NULL DEFAULT 'identity', -- 'identity' | 'gzip'
payload_hash TEXT NOT NULL, -- SHA-256 of decoded JSON bytes (pre-compression)
payload BLOB NOT NULL -- raw JSON or gzip-compressed JSON
);
CREATE INDEX idx_raw_payloads_lookup ON raw_payloads(project_id, resource_type, gitlab_id);
CREATE INDEX idx_raw_payloads_history ON raw_payloads(project_id, resource_type, gitlab_id, fetched_at);
CREATE UNIQUE INDEX uq_raw_payloads_dedupe
ON raw_payloads(project_id, resource_type, gitlab_id, payload_hash);

105
migrations/002_issues.sql Normal file
View File

@@ -0,0 +1,105 @@
-- Migration 002: Issue Ingestion Tables
-- Applies on top of 001_initial.sql
-- Issues table
CREATE TABLE issues (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
iid INTEGER NOT NULL,
title TEXT,
description TEXT,
state TEXT NOT NULL CHECK (state IN ('opened', 'closed')),
author_username TEXT,
created_at INTEGER NOT NULL, -- ms epoch UTC
updated_at INTEGER NOT NULL, -- ms epoch UTC
last_seen_at INTEGER NOT NULL, -- updated on every upsert
discussions_synced_for_updated_at INTEGER, -- watermark for dependent sync
web_url TEXT,
raw_payload_id INTEGER REFERENCES raw_payloads(id)
);
CREATE INDEX idx_issues_project_updated ON issues(project_id, updated_at);
CREATE INDEX idx_issues_author ON issues(author_username);
CREATE UNIQUE INDEX uq_issues_project_iid ON issues(project_id, iid);
-- Labels table (name-only for CP1)
CREATE TABLE labels (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER, -- optional, for future Labels API
project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
name TEXT NOT NULL,
color TEXT,
description TEXT
);
CREATE UNIQUE INDEX uq_labels_project_name ON labels(project_id, name);
CREATE INDEX idx_labels_name ON labels(name);
-- Issue-label junction (DELETE before INSERT for stale removal)
CREATE TABLE issue_labels (
issue_id INTEGER NOT NULL REFERENCES issues(id) ON DELETE CASCADE,
label_id INTEGER NOT NULL REFERENCES labels(id) ON DELETE CASCADE,
PRIMARY KEY(issue_id, label_id)
);
CREATE INDEX idx_issue_labels_label ON issue_labels(label_id);
-- Discussion threads for issues (MR discussions added in CP2)
CREATE TABLE discussions (
id INTEGER PRIMARY KEY,
gitlab_discussion_id TEXT NOT NULL, -- GitLab string ID (e.g., "6a9c1750b37d...")
project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
issue_id INTEGER REFERENCES issues(id) ON DELETE CASCADE,
merge_request_id INTEGER, -- FK added in CP2 via ALTER TABLE
noteable_type TEXT NOT NULL CHECK (noteable_type IN ('Issue', 'MergeRequest')),
individual_note INTEGER NOT NULL DEFAULT 0, -- 0=threaded, 1=standalone
first_note_at INTEGER, -- min(note.created_at) for ordering
last_note_at INTEGER, -- max(note.created_at) for "recently active"
last_seen_at INTEGER NOT NULL, -- updated on every upsert
resolvable INTEGER NOT NULL DEFAULT 0, -- MR discussions can be resolved
resolved INTEGER NOT NULL DEFAULT 0,
CHECK (
(noteable_type = 'Issue' AND issue_id IS NOT NULL AND merge_request_id IS NULL) OR
(noteable_type = 'MergeRequest' AND merge_request_id IS NOT NULL AND issue_id IS NULL)
)
);
CREATE UNIQUE INDEX uq_discussions_project_discussion_id ON discussions(project_id, gitlab_discussion_id);
CREATE INDEX idx_discussions_issue ON discussions(issue_id);
CREATE INDEX idx_discussions_mr ON discussions(merge_request_id);
CREATE INDEX idx_discussions_last_note ON discussions(last_note_at);
-- Notes belong to discussions
CREATE TABLE notes (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
discussion_id INTEGER NOT NULL REFERENCES discussions(id) ON DELETE CASCADE,
project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
note_type TEXT, -- 'DiscussionNote' | 'DiffNote' | null
is_system INTEGER NOT NULL DEFAULT 0, -- 1 for system-generated notes
author_username TEXT,
body TEXT,
created_at INTEGER NOT NULL, -- ms epoch
updated_at INTEGER NOT NULL, -- ms epoch
last_seen_at INTEGER NOT NULL, -- updated on every upsert
position INTEGER, -- 0-indexed array order from API
resolvable INTEGER NOT NULL DEFAULT 0,
resolved INTEGER NOT NULL DEFAULT 0,
resolved_by TEXT,
resolved_at INTEGER,
-- DiffNote position metadata (populated for MR DiffNotes in CP2)
position_old_path TEXT,
position_new_path TEXT,
position_old_line INTEGER,
position_new_line INTEGER,
raw_payload_id INTEGER REFERENCES raw_payloads(id)
);
CREATE INDEX idx_notes_discussion ON notes(discussion_id);
CREATE INDEX idx_notes_author ON notes(author_username);
CREATE INDEX idx_notes_system ON notes(is_system);
-- Update schema version
INSERT INTO schema_version (version, applied_at, description)
VALUES (2, strftime('%s', 'now') * 1000, 'Issue ingestion tables');

View File

@@ -0,0 +1,12 @@
-- Migration 003: Performance and Orphan Detection Indexes
-- Adds indexes for efficient orphan detection queries
-- Index for orphan detection: find issues/discussions not seen recently
-- Used by cleanup routines to identify potentially stale data
CREATE INDEX IF NOT EXISTS idx_issues_last_seen ON issues(last_seen_at);
CREATE INDEX IF NOT EXISTS idx_discussions_last_seen ON discussions(last_seen_at);
CREATE INDEX IF NOT EXISTS idx_notes_last_seen ON notes(last_seen_at);
-- Update schema version
INSERT INTO schema_version (version, applied_at, description)
VALUES (3, strftime('%s', 'now') * 1000, 'Performance and orphan detection indexes');

View File

@@ -0,0 +1,8 @@
-- Migration 004: Add raw_payload_id to discussions
-- The column was expected by code but missing from initial schema
ALTER TABLE discussions ADD COLUMN raw_payload_id INTEGER REFERENCES raw_payloads(id);
-- Update schema version
INSERT INTO schema_version (version, applied_at, description)
VALUES (4, strftime('%s', 'now') * 1000, 'Add raw_payload_id to discussions');

View File

@@ -0,0 +1,43 @@
-- Migration 005: Add assignees, milestone, and due_date support
-- Schema version: 5
-- Add new columns to issues table
ALTER TABLE issues ADD COLUMN due_date TEXT; -- YYYY-MM-DD format, nullable
ALTER TABLE issues ADD COLUMN milestone_id INTEGER; -- Local milestone ID (FK to milestones.id)
ALTER TABLE issues ADD COLUMN milestone_title TEXT; -- Denormalized for quick display
-- Milestones table (captures key fields for filtering and display)
CREATE TABLE IF NOT EXISTS milestones (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
iid INTEGER NOT NULL, -- Project-scoped milestone number
title TEXT NOT NULL,
description TEXT,
state TEXT, -- 'active' or 'closed'
due_date TEXT, -- YYYY-MM-DD
web_url TEXT,
UNIQUE(project_id, gitlab_id)
);
CREATE INDEX IF NOT EXISTS idx_milestones_project ON milestones(project_id);
CREATE INDEX IF NOT EXISTS idx_milestones_state ON milestones(project_id, state);
-- Issue assignees junction table (issues can have multiple assignees)
CREATE TABLE IF NOT EXISTS issue_assignees (
issue_id INTEGER NOT NULL REFERENCES issues(id) ON DELETE CASCADE,
username TEXT NOT NULL,
PRIMARY KEY (issue_id, username)
);
CREATE INDEX IF NOT EXISTS idx_issue_assignees_username ON issue_assignees(username);
-- Index for due_date filtering
CREATE INDEX IF NOT EXISTS idx_issues_due_date ON issues(due_date);
-- Index for milestone filtering
CREATE INDEX IF NOT EXISTS idx_issues_milestone ON issues(milestone_id);
-- Update schema version
INSERT INTO schema_version (version, applied_at, description)
VALUES (5, strftime('%s', 'now') * 1000, 'Add assignees, milestone, and due_date support');