Files
gitlore/migrations/001_initial.sql
Taylor Eernisse d15f457a58 feat(db): Add SQLite database migrations for GitLab data model
Implements a comprehensive relational schema for storing GitLab data
with full audit trail and raw payload preservation.

Migration 001_initial.sql establishes core metadata tables:
- projects: Tracked GitLab projects with paths and namespace
- sync_watermarks: Cursor-based incremental sync state per project
- schema_migrations: Migration tracking with checksums for integrity

Migration 002_issues.sql creates the issues data model:
- issues: Core issue data with timestamps, author, state, counts
- labels: Project-specific label definitions with colors/descriptions
- issue_labels: Many-to-many junction for issue-label relationships
- milestones: Project milestones with state and due dates
- discussions: Threaded discussions linked to issues/MRs
- notes: Individual notes within discussions with full metadata
- raw_payloads: Compressed original API responses keyed by entity

Migration 003_indexes.sql adds performance indexes:
- Covering indexes for common query patterns (state, updated_at)
- Composite indexes for filtered queries (project + state)

Migration 004_discussions_payload.sql extends discussions:
- Adds raw_payload column for discussion-level API preservation
- Enables debugging and data recovery from original responses

Migration 005_assignees_milestone_duedate.sql completes the model:
- issue_assignees: Many-to-many for multiple assignees per issue
- Adds milestone_id, due_date columns to issues table
- Indexes for assignee and milestone filtering

Schema supports both incremental sync and full historical queries.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-26 11:27:51 -05:00

69 lines
2.8 KiB
SQL

-- Schema version tracking
CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER PRIMARY KEY,
applied_at INTEGER NOT NULL, -- ms epoch UTC
description TEXT
);
INSERT INTO schema_version (version, applied_at, description)
VALUES (1, strftime('%s', 'now') * 1000, 'Initial schema');
-- Projects table (configured targets)
CREATE TABLE projects (
id INTEGER PRIMARY KEY,
gitlab_project_id INTEGER UNIQUE NOT NULL,
path_with_namespace TEXT NOT NULL,
default_branch TEXT,
web_url TEXT,
created_at INTEGER, -- ms epoch UTC
updated_at INTEGER, -- ms epoch UTC
raw_payload_id INTEGER REFERENCES raw_payloads(id)
);
CREATE INDEX idx_projects_path ON projects(path_with_namespace);
-- Sync tracking for reliability
CREATE TABLE sync_runs (
id INTEGER PRIMARY KEY,
started_at INTEGER NOT NULL, -- ms epoch UTC
heartbeat_at INTEGER NOT NULL, -- ms epoch UTC
finished_at INTEGER, -- ms epoch UTC
status TEXT NOT NULL, -- 'running' | 'succeeded' | 'failed'
command TEXT NOT NULL, -- 'init' | 'ingest issues' | 'sync' | etc.
error TEXT,
metrics_json TEXT -- JSON blob of per-run counters/timing
);
-- Crash-safe single-flight lock (DB-enforced)
CREATE TABLE app_locks (
name TEXT PRIMARY KEY, -- 'sync'
owner TEXT NOT NULL, -- random run token (UUIDv4)
acquired_at INTEGER NOT NULL, -- ms epoch UTC
heartbeat_at INTEGER NOT NULL -- ms epoch UTC
);
-- Sync cursors for primary resources only
CREATE TABLE sync_cursors (
project_id INTEGER NOT NULL REFERENCES projects(id),
resource_type TEXT NOT NULL, -- 'issues' | 'merge_requests'
updated_at_cursor INTEGER, -- ms epoch UTC, last fully processed
tie_breaker_id INTEGER, -- last fully processed gitlab_id
PRIMARY KEY(project_id, resource_type)
);
-- Raw payload storage (decoupled from entity tables)
CREATE TABLE raw_payloads (
id INTEGER PRIMARY KEY,
source TEXT NOT NULL, -- 'gitlab'
project_id INTEGER REFERENCES projects(id),
resource_type TEXT NOT NULL, -- 'project' | 'issue' | 'mr' | 'note' | 'discussion'
gitlab_id TEXT NOT NULL, -- TEXT: discussion IDs are strings
fetched_at INTEGER NOT NULL, -- ms epoch UTC
content_encoding TEXT NOT NULL DEFAULT 'identity', -- 'identity' | 'gzip'
payload_hash TEXT NOT NULL, -- SHA-256 of decoded JSON bytes (pre-compression)
payload BLOB NOT NULL -- raw JSON or gzip-compressed JSON
);
CREATE INDEX idx_raw_payloads_lookup ON raw_payloads(project_id, resource_type, gitlab_id);
CREATE INDEX idx_raw_payloads_history ON raw_payloads(project_id, resource_type, gitlab_id, fetched_at);
CREATE UNIQUE INDEX uq_raw_payloads_dedupe
ON raw_payloads(project_id, resource_type, gitlab_id, payload_hash);