Implements a comprehensive relational schema for storing GitLab data with full audit trail and raw payload preservation. Migration 001_initial.sql establishes core metadata tables: - projects: Tracked GitLab projects with paths and namespace - sync_watermarks: Cursor-based incremental sync state per project - schema_migrations: Migration tracking with checksums for integrity Migration 002_issues.sql creates the issues data model: - issues: Core issue data with timestamps, author, state, counts - labels: Project-specific label definitions with colors/descriptions - issue_labels: Many-to-many junction for issue-label relationships - milestones: Project milestones with state and due dates - discussions: Threaded discussions linked to issues/MRs - notes: Individual notes within discussions with full metadata - raw_payloads: Compressed original API responses keyed by entity Migration 003_indexes.sql adds performance indexes: - Covering indexes for common query patterns (state, updated_at) - Composite indexes for filtered queries (project + state) Migration 004_discussions_payload.sql extends discussions: - Adds raw_payload column for discussion-level API preservation - Enables debugging and data recovery from original responses Migration 005_assignees_milestone_duedate.sql completes the model: - issue_assignees: Many-to-many for multiple assignees per issue - Adds milestone_id, due_date columns to issues table - Indexes for assignee and milestone filtering Schema supports both incremental sync and full historical queries. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
69 lines
2.8 KiB
SQL
69 lines
2.8 KiB
SQL
-- Schema version tracking
|
|
CREATE TABLE IF NOT EXISTS schema_version (
|
|
version INTEGER PRIMARY KEY,
|
|
applied_at INTEGER NOT NULL, -- ms epoch UTC
|
|
description TEXT
|
|
);
|
|
|
|
INSERT INTO schema_version (version, applied_at, description)
|
|
VALUES (1, strftime('%s', 'now') * 1000, 'Initial schema');
|
|
|
|
-- Projects table (configured targets)
|
|
CREATE TABLE projects (
|
|
id INTEGER PRIMARY KEY,
|
|
gitlab_project_id INTEGER UNIQUE NOT NULL,
|
|
path_with_namespace TEXT NOT NULL,
|
|
default_branch TEXT,
|
|
web_url TEXT,
|
|
created_at INTEGER, -- ms epoch UTC
|
|
updated_at INTEGER, -- ms epoch UTC
|
|
raw_payload_id INTEGER REFERENCES raw_payloads(id)
|
|
);
|
|
CREATE INDEX idx_projects_path ON projects(path_with_namespace);
|
|
|
|
-- Sync tracking for reliability
|
|
CREATE TABLE sync_runs (
|
|
id INTEGER PRIMARY KEY,
|
|
started_at INTEGER NOT NULL, -- ms epoch UTC
|
|
heartbeat_at INTEGER NOT NULL, -- ms epoch UTC
|
|
finished_at INTEGER, -- ms epoch UTC
|
|
status TEXT NOT NULL, -- 'running' | 'succeeded' | 'failed'
|
|
command TEXT NOT NULL, -- 'init' | 'ingest issues' | 'sync' | etc.
|
|
error TEXT,
|
|
metrics_json TEXT -- JSON blob of per-run counters/timing
|
|
);
|
|
|
|
-- Crash-safe single-flight lock (DB-enforced)
|
|
CREATE TABLE app_locks (
|
|
name TEXT PRIMARY KEY, -- 'sync'
|
|
owner TEXT NOT NULL, -- random run token (UUIDv4)
|
|
acquired_at INTEGER NOT NULL, -- ms epoch UTC
|
|
heartbeat_at INTEGER NOT NULL -- ms epoch UTC
|
|
);
|
|
|
|
-- Sync cursors for primary resources only
|
|
CREATE TABLE sync_cursors (
|
|
project_id INTEGER NOT NULL REFERENCES projects(id),
|
|
resource_type TEXT NOT NULL, -- 'issues' | 'merge_requests'
|
|
updated_at_cursor INTEGER, -- ms epoch UTC, last fully processed
|
|
tie_breaker_id INTEGER, -- last fully processed gitlab_id
|
|
PRIMARY KEY(project_id, resource_type)
|
|
);
|
|
|
|
-- Raw payload storage (decoupled from entity tables)
|
|
CREATE TABLE raw_payloads (
|
|
id INTEGER PRIMARY KEY,
|
|
source TEXT NOT NULL, -- 'gitlab'
|
|
project_id INTEGER REFERENCES projects(id),
|
|
resource_type TEXT NOT NULL, -- 'project' | 'issue' | 'mr' | 'note' | 'discussion'
|
|
gitlab_id TEXT NOT NULL, -- TEXT: discussion IDs are strings
|
|
fetched_at INTEGER NOT NULL, -- ms epoch UTC
|
|
content_encoding TEXT NOT NULL DEFAULT 'identity', -- 'identity' | 'gzip'
|
|
payload_hash TEXT NOT NULL, -- SHA-256 of decoded JSON bytes (pre-compression)
|
|
payload BLOB NOT NULL -- raw JSON or gzip-compressed JSON
|
|
);
|
|
CREATE INDEX idx_raw_payloads_lookup ON raw_payloads(project_id, resource_type, gitlab_id);
|
|
CREATE INDEX idx_raw_payloads_history ON raw_payloads(project_id, resource_type, gitlab_id, fetched_at);
|
|
CREATE UNIQUE INDEX uq_raw_payloads_dedupe
|
|
ON raw_payloads(project_id, resource_type, gitlab_id, payload_hash);
|