Compare commits
5 Commits
3f38b3fda7
...
571c304031
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
571c304031 | ||
|
|
e4ac7020b3 | ||
|
|
c7a7898675 | ||
|
|
5fd1ce6905 | ||
|
|
b67bb8754c |
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -1158,7 +1158,7 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lore"
|
name = "lore"
|
||||||
version = "0.9.1"
|
version = "0.9.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"charmed-lipgloss",
|
"charmed-lipgloss",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lore"
|
name = "lore"
|
||||||
version = "0.9.1"
|
version = "0.9.2"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
description = "Gitlore - Local GitLab data management with semantic search"
|
description = "Gitlore - Local GitLab data management with semantic search"
|
||||||
authors = ["Taylor Eernisse"]
|
authors = ["Taylor Eernisse"]
|
||||||
|
|||||||
1654
api-review.html
1654
api-review.html
File diff suppressed because it is too large
Load Diff
@@ -1,844 +0,0 @@
|
|||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
<title>Gitlore Sync Pipeline Explorer</title>
|
|
||||||
<style>
|
|
||||||
:root {
|
|
||||||
--bg: #0d1117;
|
|
||||||
--bg-secondary: #161b22;
|
|
||||||
--bg-tertiary: #1c2129;
|
|
||||||
--border: #30363d;
|
|
||||||
--text: #c9d1d9;
|
|
||||||
--text-dim: #8b949e;
|
|
||||||
--text-bright: #f0f6fc;
|
|
||||||
--cyan: #58a6ff;
|
|
||||||
--green: #3fb950;
|
|
||||||
--amber: #d29922;
|
|
||||||
--red: #f85149;
|
|
||||||
--purple: #bc8cff;
|
|
||||||
--pink: #f778ba;
|
|
||||||
--cyan-dim: rgba(88,166,255,0.15);
|
|
||||||
--green-dim: rgba(63,185,80,0.15);
|
|
||||||
--amber-dim: rgba(210,153,34,0.15);
|
|
||||||
--red-dim: rgba(248,81,73,0.15);
|
|
||||||
--purple-dim: rgba(188,140,255,0.15);
|
|
||||||
}
|
|
||||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
|
||||||
body {
|
|
||||||
font-family: 'SF Mono', 'Cascadia Code', 'Fira Code', 'JetBrains Mono', monospace;
|
|
||||||
background: var(--bg); color: var(--text);
|
|
||||||
display: flex; height: 100vh; overflow: hidden;
|
|
||||||
}
|
|
||||||
.sidebar {
|
|
||||||
width: 220px; min-width: 220px; background: var(--bg-secondary);
|
|
||||||
border-right: 1px solid var(--border); display: flex; flex-direction: column; padding: 16px 0;
|
|
||||||
}
|
|
||||||
.sidebar-title {
|
|
||||||
font-size: 11px; font-weight: 700; text-transform: uppercase;
|
|
||||||
letter-spacing: 1.2px; color: var(--text-dim); padding: 0 16px 12px;
|
|
||||||
}
|
|
||||||
.logo {
|
|
||||||
padding: 0 16px 20px; font-size: 15px; font-weight: 700; color: var(--cyan);
|
|
||||||
display: flex; align-items: center; gap: 8px;
|
|
||||||
}
|
|
||||||
.logo svg { width: 20px; height: 20px; }
|
|
||||||
.nav-item {
|
|
||||||
padding: 10px 16px; cursor: pointer; font-size: 13px; color: var(--text-dim);
|
|
||||||
transition: all 0.15s; border-left: 3px solid transparent;
|
|
||||||
display: flex; align-items: center; gap: 10px;
|
|
||||||
}
|
|
||||||
.nav-item:hover { background: var(--bg-tertiary); color: var(--text); }
|
|
||||||
.nav-item.active { background: var(--cyan-dim); color: var(--cyan); border-left-color: var(--cyan); }
|
|
||||||
.nav-dot { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; }
|
|
||||||
.main { flex: 1; display: flex; flex-direction: column; overflow: hidden; }
|
|
||||||
.header {
|
|
||||||
padding: 16px 24px; border-bottom: 1px solid var(--border);
|
|
||||||
display: flex; align-items: center; justify-content: space-between;
|
|
||||||
}
|
|
||||||
.header h1 { font-size: 16px; font-weight: 600; color: var(--text-bright); }
|
|
||||||
.header-badge {
|
|
||||||
font-size: 11px; padding: 3px 10px; border-radius: 12px;
|
|
||||||
background: var(--cyan-dim); color: var(--cyan);
|
|
||||||
}
|
|
||||||
.canvas-wrapper { flex: 1; overflow: auto; position: relative; }
|
|
||||||
.canvas { padding: 32px; min-height: 100%; }
|
|
||||||
.flow-container { display: none; }
|
|
||||||
.flow-container.active { display: block; }
|
|
||||||
.phase { margin-bottom: 32px; }
|
|
||||||
.phase-header { display: flex; align-items: center; gap: 12px; margin-bottom: 16px; }
|
|
||||||
.phase-number {
|
|
||||||
width: 28px; height: 28px; border-radius: 50%; display: flex; align-items: center;
|
|
||||||
justify-content: center; font-size: 13px; font-weight: 700; flex-shrink: 0;
|
|
||||||
}
|
|
||||||
.phase-title { font-size: 14px; font-weight: 600; color: var(--text-bright); }
|
|
||||||
.phase-subtitle { font-size: 11px; color: var(--text-dim); margin-left: 4px; font-weight: 400; }
|
|
||||||
.flow-row {
|
|
||||||
display: flex; align-items: stretch; gap: 0; flex-wrap: wrap;
|
|
||||||
margin-left: 14px; padding-left: 26px; border-left: 2px solid var(--border);
|
|
||||||
}
|
|
||||||
.flow-row:last-child { border-left-color: transparent; }
|
|
||||||
.node {
|
|
||||||
position: relative; padding: 12px 16px; border-radius: 8px;
|
|
||||||
border: 1px solid var(--border); background: var(--bg-secondary);
|
|
||||||
font-size: 12px; cursor: pointer; transition: all 0.2s;
|
|
||||||
min-width: 180px; max-width: 260px; margin: 4px 0;
|
|
||||||
}
|
|
||||||
.node:hover {
|
|
||||||
border-color: var(--cyan); transform: translateY(-1px);
|
|
||||||
box-shadow: 0 4px 12px rgba(0,0,0,0.3);
|
|
||||||
}
|
|
||||||
.node.selected {
|
|
||||||
border-color: var(--cyan);
|
|
||||||
box-shadow: 0 0 0 1px var(--cyan), 0 4px 16px rgba(88,166,255,0.15);
|
|
||||||
}
|
|
||||||
.node-title { font-weight: 600; font-size: 12px; margin-bottom: 4px; color: var(--text-bright); }
|
|
||||||
.node-desc { font-size: 11px; color: var(--text-dim); line-height: 1.5; }
|
|
||||||
.node.api { border-left: 3px solid var(--cyan); }
|
|
||||||
.node.transform { border-left: 3px solid var(--purple); }
|
|
||||||
.node.db { border-left: 3px solid var(--green); }
|
|
||||||
.node.decision { border-left: 3px solid var(--amber); }
|
|
||||||
.node.error { border-left: 3px solid var(--red); }
|
|
||||||
.node.queue { border-left: 3px solid var(--pink); }
|
|
||||||
.arrow {
|
|
||||||
display: flex; align-items: center; padding: 0 6px;
|
|
||||||
color: var(--text-dim); font-size: 16px; flex-shrink: 0;
|
|
||||||
}
|
|
||||||
.arrow-down {
|
|
||||||
display: flex; justify-content: center; padding: 4px 0;
|
|
||||||
color: var(--text-dim); font-size: 16px; margin-left: 14px;
|
|
||||||
padding-left: 26px; border-left: 2px solid var(--border);
|
|
||||||
}
|
|
||||||
.branch-container {
|
|
||||||
margin-left: 14px; padding-left: 26px;
|
|
||||||
border-left: 2px solid var(--border); padding-bottom: 8px;
|
|
||||||
}
|
|
||||||
.branch-row { display: flex; gap: 12px; margin: 8px 0; flex-wrap: wrap; }
|
|
||||||
.branch-label {
|
|
||||||
font-size: 11px; font-weight: 600; margin: 8px 0 4px;
|
|
||||||
display: flex; align-items: center; gap: 6px;
|
|
||||||
}
|
|
||||||
.branch-label.success { color: var(--green); }
|
|
||||||
.branch-label.error { color: var(--red); }
|
|
||||||
.branch-label.retry { color: var(--amber); }
|
|
||||||
.diff-badge {
|
|
||||||
display: inline-block; font-size: 10px; padding: 2px 6px;
|
|
||||||
border-radius: 4px; margin-top: 6px; font-weight: 600;
|
|
||||||
}
|
|
||||||
.diff-badge.changed { background: var(--amber-dim); color: var(--amber); }
|
|
||||||
.diff-badge.same { background: var(--green-dim); color: var(--green); }
|
|
||||||
.detail-panel {
|
|
||||||
position: fixed; right: 0; top: 0; bottom: 0; width: 380px;
|
|
||||||
background: var(--bg-secondary); border-left: 1px solid var(--border);
|
|
||||||
transform: translateX(100%); transition: transform 0.25s ease;
|
|
||||||
z-index: 100; display: flex; flex-direction: column; overflow: hidden;
|
|
||||||
}
|
|
||||||
.detail-panel.open { transform: translateX(0); }
|
|
||||||
.detail-header {
|
|
||||||
padding: 16px 20px; border-bottom: 1px solid var(--border);
|
|
||||||
display: flex; align-items: center; justify-content: space-between;
|
|
||||||
}
|
|
||||||
.detail-header h2 { font-size: 14px; font-weight: 600; color: var(--text-bright); }
|
|
||||||
.detail-close {
|
|
||||||
cursor: pointer; color: var(--text-dim); font-size: 18px;
|
|
||||||
background: none; border: none; padding: 4px 8px; border-radius: 4px;
|
|
||||||
}
|
|
||||||
.detail-close:hover { background: var(--bg-tertiary); color: var(--text); }
|
|
||||||
.detail-body { flex: 1; overflow-y: auto; padding: 20px; }
|
|
||||||
.detail-section { margin-bottom: 20px; }
|
|
||||||
.detail-section h3 {
|
|
||||||
font-size: 11px; text-transform: uppercase; letter-spacing: 0.8px;
|
|
||||||
color: var(--text-dim); margin-bottom: 8px;
|
|
||||||
}
|
|
||||||
.detail-section p { font-size: 12px; line-height: 1.7; color: var(--text); }
|
|
||||||
.sql-block {
|
|
||||||
background: var(--bg); border: 1px solid var(--border); border-radius: 6px;
|
|
||||||
padding: 12px; font-size: 11px; line-height: 1.6; color: var(--green);
|
|
||||||
overflow-x: auto; white-space: pre; margin-top: 8px;
|
|
||||||
}
|
|
||||||
.detail-tag {
|
|
||||||
display: inline-block; font-size: 10px; padding: 2px 8px;
|
|
||||||
border-radius: 10px; margin: 2px 4px 2px 0;
|
|
||||||
}
|
|
||||||
.detail-tag.file { background: var(--purple-dim); color: var(--purple); }
|
|
||||||
.detail-tag.type-api { background: var(--cyan-dim); color: var(--cyan); }
|
|
||||||
.detail-tag.type-db { background: var(--green-dim); color: var(--green); }
|
|
||||||
.detail-tag.type-transform { background: var(--purple-dim); color: var(--purple); }
|
|
||||||
.detail-tag.type-decision { background: var(--amber-dim); color: var(--amber); }
|
|
||||||
.detail-tag.type-error { background: var(--red-dim); color: var(--red); }
|
|
||||||
.detail-tag.type-queue { background: rgba(247,120,186,0.15); color: var(--pink); }
|
|
||||||
.watermark-panel { border-top: 1px solid var(--border); background: var(--bg-secondary); }
|
|
||||||
.watermark-toggle {
|
|
||||||
padding: 10px 24px; cursor: pointer; font-size: 12px; color: var(--text-dim);
|
|
||||||
display: flex; align-items: center; gap: 8px; user-select: none;
|
|
||||||
}
|
|
||||||
.watermark-toggle:hover { color: var(--text); }
|
|
||||||
.watermark-toggle .chevron { transition: transform 0.2s; font-size: 10px; }
|
|
||||||
.watermark-toggle .chevron.open { transform: rotate(180deg); }
|
|
||||||
.watermark-content { display: none; padding: 0 24px 16px; max-height: 260px; overflow-y: auto; }
|
|
||||||
.watermark-content.open { display: block; }
|
|
||||||
.wm-table { width: 100%; border-collapse: collapse; font-size: 11px; }
|
|
||||||
.wm-table th {
|
|
||||||
text-align: left; padding: 6px 12px; color: var(--text-dim); font-weight: 600;
|
|
||||||
border-bottom: 1px solid var(--border); font-size: 10px;
|
|
||||||
text-transform: uppercase; letter-spacing: 0.5px;
|
|
||||||
}
|
|
||||||
.wm-table td { padding: 6px 12px; border-bottom: 1px solid var(--border); color: var(--text); }
|
|
||||||
.wm-table td:first-child { color: var(--cyan); font-weight: 600; }
|
|
||||||
.wm-table td:nth-child(2) { color: var(--green); }
|
|
||||||
.overview-pipeline { display: flex; gap: 0; align-items: stretch; margin: 24px 0; flex-wrap: wrap; }
|
|
||||||
.overview-stage {
|
|
||||||
flex: 1; min-width: 200px; background: var(--bg-secondary);
|
|
||||||
border: 1px solid var(--border); border-radius: 10px; padding: 20px;
|
|
||||||
cursor: pointer; transition: all 0.2s;
|
|
||||||
}
|
|
||||||
.overview-stage:hover {
|
|
||||||
border-color: var(--cyan); transform: translateY(-2px);
|
|
||||||
box-shadow: 0 6px 20px rgba(0,0,0,0.3);
|
|
||||||
}
|
|
||||||
.overview-arrow { display: flex; align-items: center; padding: 0 8px; font-size: 20px; color: var(--text-dim); }
|
|
||||||
.stage-num { font-size: 10px; font-weight: 700; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 8px; }
|
|
||||||
.stage-title { font-size: 15px; font-weight: 700; color: var(--text-bright); margin-bottom: 6px; }
|
|
||||||
.stage-desc { font-size: 11px; color: var(--text-dim); line-height: 1.6; }
|
|
||||||
.stage-detail {
|
|
||||||
margin-top: 12px; padding-top: 12px; border-top: 1px solid var(--border);
|
|
||||||
font-size: 11px; color: var(--text-dim); line-height: 1.6;
|
|
||||||
}
|
|
||||||
.stage-detail code {
|
|
||||||
color: var(--amber); background: var(--amber-dim); padding: 1px 5px;
|
|
||||||
border-radius: 3px; font-size: 10px;
|
|
||||||
}
|
|
||||||
.info-box {
|
|
||||||
background: var(--bg-tertiary); border: 1px solid var(--border);
|
|
||||||
border-radius: 8px; padding: 16px; margin: 16px 0; font-size: 12px; line-height: 1.7;
|
|
||||||
}
|
|
||||||
.info-box-title { font-weight: 600; color: var(--cyan); margin-bottom: 6px; display: flex; align-items: center; gap: 6px; }
|
|
||||||
.info-box ul { margin-left: 16px; color: var(--text-dim); }
|
|
||||||
.info-box li { margin: 4px 0; }
|
|
||||||
.info-box code {
|
|
||||||
color: var(--amber); background: var(--amber-dim);
|
|
||||||
padding: 1px 5px; border-radius: 3px; font-size: 11px;
|
|
||||||
}
|
|
||||||
.legend {
|
|
||||||
display: flex; gap: 16px; flex-wrap: wrap; margin-bottom: 24px;
|
|
||||||
padding: 12px 16px; background: var(--bg-secondary);
|
|
||||||
border: 1px solid var(--border); border-radius: 8px;
|
|
||||||
}
|
|
||||||
.legend-item { display: flex; align-items: center; gap: 6px; font-size: 11px; color: var(--text-dim); }
|
|
||||||
.legend-color { width: 12px; height: 3px; border-radius: 2px; }
|
|
||||||
::-webkit-scrollbar { width: 8px; height: 8px; }
|
|
||||||
::-webkit-scrollbar-track { background: transparent; }
|
|
||||||
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
|
|
||||||
::-webkit-scrollbar-thumb:hover { background: var(--text-dim); }
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
|
|
||||||
<div class="sidebar">
|
|
||||||
<div class="logo">
|
|
||||||
<svg viewBox="0 0 20 20" fill="none" stroke="currentColor" stroke-width="1.5">
|
|
||||||
<circle cx="10" cy="10" r="8"/><path d="M10 6v4l3 2"/>
|
|
||||||
</svg>
|
|
||||||
lore sync
|
|
||||||
</div>
|
|
||||||
<div class="sidebar-title">Entity Flows</div>
|
|
||||||
<div class="nav-item active" data-view="overview" onclick="switchView('overview')">
|
|
||||||
<div class="nav-dot" style="background:var(--cyan)"></div>Full Sync Overview
|
|
||||||
</div>
|
|
||||||
<div class="nav-item" data-view="issues" onclick="switchView('issues')">
|
|
||||||
<div class="nav-dot" style="background:var(--green)"></div>Issues
|
|
||||||
</div>
|
|
||||||
<div class="nav-item" data-view="mrs" onclick="switchView('mrs')">
|
|
||||||
<div class="nav-dot" style="background:var(--purple)"></div>Merge Requests
|
|
||||||
</div>
|
|
||||||
<div class="nav-item" data-view="docs" onclick="switchView('docs')">
|
|
||||||
<div class="nav-dot" style="background:var(--amber)"></div>Documents
|
|
||||||
</div>
|
|
||||||
<div class="nav-item" data-view="embed" onclick="switchView('embed')">
|
|
||||||
<div class="nav-dot" style="background:var(--pink)"></div>Embeddings
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="main">
|
|
||||||
<div class="header">
|
|
||||||
<h1 id="view-title">Full Sync Overview</h1>
|
|
||||||
<span class="header-badge" id="view-badge">4 stages</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="canvas-wrapper"><div class="canvas">
|
|
||||||
|
|
||||||
<!-- OVERVIEW -->
|
|
||||||
<div class="flow-container active" id="view-overview">
|
|
||||||
<div class="overview-pipeline">
|
|
||||||
<div class="overview-stage" onclick="switchView('issues')">
|
|
||||||
<div class="stage-num" style="color:var(--green)">Stage 1</div>
|
|
||||||
<div class="stage-title">Ingest Issues</div>
|
|
||||||
<div class="stage-desc">Fetch issues + discussions + resource events from GitLab API</div>
|
|
||||||
<div class="stage-detail">Cursor-based incremental sync.<br>Sequential discussion fetch.<br>Queue-based resource events.</div>
|
|
||||||
</div>
|
|
||||||
<div class="overview-arrow">→</div>
|
|
||||||
<div class="overview-stage" onclick="switchView('mrs')">
|
|
||||||
<div class="stage-num" style="color:var(--purple)">Stage 2</div>
|
|
||||||
<div class="stage-title">Ingest MRs</div>
|
|
||||||
<div class="stage-desc">Fetch merge requests + discussions + resource events</div>
|
|
||||||
<div class="stage-detail">Page-based incremental sync.<br>Parallel prefetch discussions.<br>Queue-based resource events.</div>
|
|
||||||
</div>
|
|
||||||
<div class="overview-arrow">→</div>
|
|
||||||
<div class="overview-stage" onclick="switchView('docs')">
|
|
||||||
<div class="stage-num" style="color:var(--amber)">Stage 3</div>
|
|
||||||
<div class="stage-title">Generate Docs</div>
|
|
||||||
<div class="stage-desc">Regenerate searchable documents for changed entities</div>
|
|
||||||
<div class="stage-detail">Driven by <code>dirty_sources</code> table.<br>Triple-hash skip optimization.<br>FTS5 index auto-updated.</div>
|
|
||||||
</div>
|
|
||||||
<div class="overview-arrow">→</div>
|
|
||||||
<div class="overview-stage" onclick="switchView('embed')">
|
|
||||||
<div class="stage-num" style="color:var(--pink)">Stage 4</div>
|
|
||||||
<div class="stage-title">Embed</div>
|
|
||||||
<div class="stage-desc">Generate vector embeddings via Ollama for semantic search</div>
|
|
||||||
<div class="stage-detail">Hash-based change detection.<br>Chunked, batched API calls.<br><b>Non-fatal</b> — graceful if Ollama down.</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="info-box">
|
|
||||||
<div class="info-box-title">Concurrency Model</div>
|
|
||||||
<ul>
|
|
||||||
<li>Stages 1 & 2 process <b>projects concurrently</b> via <code>buffer_unordered(primary_concurrency)</code></li>
|
|
||||||
<li>Each project gets its own <b>SQLite connection</b>; rate limiter is <b>shared</b></li>
|
|
||||||
<li>Discussions: <b>sequential</b> (issues) or <b>batched parallel prefetch</b> (MRs)</li>
|
|
||||||
<li>Resource events use a <b>persistent job queue</b> with atomic claim + exponential backoff</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div class="info-box">
|
|
||||||
<div class="info-box-title">Sync Flags</div>
|
|
||||||
<ul>
|
|
||||||
<li><code>--full</code> — Resets all cursors & watermarks, forces complete re-fetch</li>
|
|
||||||
<li><code>--no-docs</code> — Skips Stage 3 (document generation)</li>
|
|
||||||
<li><code>--no-embed</code> — Skips Stage 4 (embedding generation)</li>
|
|
||||||
<li><code>--force</code> — Overrides stale single-flight lock</li>
|
|
||||||
<li><code>--project <path></code> — Sync only one project (fuzzy matching)</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div class="info-box">
|
|
||||||
<div class="info-box-title">Single-Flight Lock</div>
|
|
||||||
<ul>
|
|
||||||
<li>Table-based lock (<code>AppLock</code>) prevents concurrent syncs</li>
|
|
||||||
<li>Heartbeat keeps the lock alive; stale locks auto-detected</li>
|
|
||||||
<li>Use <code>--force</code> to override a stale lock</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- ISSUES -->
|
|
||||||
<div class="flow-container" id="view-issues">
|
|
||||||
<div class="legend">
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--cyan)"></div>API Call</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--purple)"></div>Transform</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--green)"></div>Database</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--amber)"></div>Decision</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--red)"></div>Error Path</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--pink)"></div>Queue</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:var(--cyan-dim);color:var(--cyan)">1</div>
|
|
||||||
<div class="phase-title">Fetch Issues <span class="phase-subtitle">Cursor-Based Incremental Sync</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node api" data-detail="issue-api-call"><div class="node-title">GitLab API Call</div><div class="node-desc">paginate_issues() with<br>updated_after = cursor - rewind</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node decision" data-detail="issue-cursor-filter"><div class="node-title">Cursor Filter</div><div class="node-desc">updated_at > cursor_ts<br>OR tie_breaker check</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node transform" data-detail="issue-transform"><div class="node-title">transform_issue()</div><div class="node-desc">GitLab API shape →<br>local DB row shape</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="issue-transaction"><div class="node-title">Transaction</div><div class="node-desc">store_payload → upsert →<br>mark_dirty → relink</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="arrow-down">↓</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node db" data-detail="issue-cursor-update"><div class="node-title">Update Cursor</div><div class="node-desc">Every 100 issues + final<br>sync_cursors table</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:var(--green-dim);color:var(--green)">2</div>
|
|
||||||
<div class="phase-title">Discussion Sync <span class="phase-subtitle">Sequential, Watermark-Based</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node db" data-detail="issue-disc-query"><div class="node-title">Query Stale Issues</div><div class="node-desc">updated_at > COALESCE(<br>discussions_synced_for_<br>updated_at, 0)</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node api" data-detail="issue-disc-fetch"><div class="node-title">Paginate Discussions</div><div class="node-desc">Sequential per issue<br>paginate_issue_discussions()</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node transform" data-detail="issue-disc-transform"><div class="node-title">Transform</div><div class="node-desc">transform_discussion()<br>transform_notes()</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="issue-disc-write"><div class="node-title">Write Discussion</div><div class="node-desc">store_payload → upsert<br>DELETE notes → INSERT notes</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-container">
|
|
||||||
<div class="branch-label success">✓ On Success (all pages fetched)</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node db" data-detail="issue-disc-stale"><div class="node-title">Remove Stale</div><div class="node-desc">DELETE discussions not<br>seen in this fetch</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="issue-disc-watermark"><div class="node-title">Advance Watermark</div><div class="node-desc">discussions_synced_for_<br>updated_at = updated_at</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-label error">✗ On Pagination Error</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node error" data-detail="issue-disc-fail"><div class="node-title">Skip Stale Removal</div><div class="node-desc">Watermark NOT advanced<br>Will retry next sync</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:rgba(247,120,186,0.15);color:var(--pink)">3</div>
|
|
||||||
<div class="phase-title">Resource Events <span class="phase-subtitle">Queue-Based, Concurrent Fetch</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node queue" data-detail="re-cleanup"><div class="node-title">Cleanup Obsolete</div><div class="node-desc">DELETE jobs where entity<br>watermark is current</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node queue" data-detail="re-enqueue"><div class="node-title">Enqueue Jobs</div><div class="node-desc">INSERT for entities where<br>updated_at > watermark</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node queue" data-detail="re-claim"><div class="node-title">Claim Jobs</div><div class="node-desc">Atomic UPDATE...RETURNING<br>with lock acquisition</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node api" data-detail="re-fetch"><div class="node-title">Fetch Events</div><div class="node-desc">3 concurrent: state +<br>label + milestone</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-container">
|
|
||||||
<div class="branch-label success">✓ On Success</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node db" data-detail="re-store"><div class="node-title">Store Events</div><div class="node-desc">Transaction: upsert all<br>3 event types</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="re-complete"><div class="node-title">Complete + Watermark</div><div class="node-desc">DELETE job row<br>Advance watermark</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-label error">✗ Permanent Error (404 / 403)</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node error" data-detail="re-permanent"><div class="node-title">Skip Permanently</div><div class="node-desc">complete_job + advance<br>watermark (coalesced)</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-label retry">↻ Transient Error</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node error" data-detail="re-transient"><div class="node-title">Backoff Retry</div><div class="node-desc">fail_job: 30s x 2^(n-1)<br>capped at 480s</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- MERGE REQUESTS -->
|
|
||||||
<div class="flow-container" id="view-mrs">
|
|
||||||
<div class="legend">
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--cyan)"></div>API Call</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--purple)"></div>Transform</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--green)"></div>Database</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--amber)"></div>Diff from Issues</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--red)"></div>Error Path</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--pink)"></div>Queue</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:var(--cyan-dim);color:var(--cyan)">1</div>
|
|
||||||
<div class="phase-title">Fetch MRs <span class="phase-subtitle">Page-Based Incremental Sync</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node api" data-detail="mr-api-call"><div class="node-title">GitLab API Call</div><div class="node-desc">fetch_merge_requests_page()<br>with cursor rewind</div><div class="diff-badge changed">Page-based, not streaming</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node decision" data-detail="mr-cursor-filter"><div class="node-title">Cursor Filter</div><div class="node-desc">Same logic as issues:<br>timestamp + tie-breaker</div><div class="diff-badge same">Same as issues</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node transform" data-detail="mr-transform"><div class="node-title">transform_merge_request()</div><div class="node-desc">Maps API shape →<br>local DB row</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="mr-transaction"><div class="node-title">Transaction</div><div class="node-desc">store → upsert → dirty →<br>labels + assignees + reviewers</div><div class="diff-badge changed">3 junction tables (not 2)</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="arrow-down">↓</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node db" data-detail="mr-cursor-update"><div class="node-title">Update Cursor</div><div class="node-desc">Per page (not every 100)</div><div class="diff-badge changed">Per page boundary</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:var(--green-dim);color:var(--green)">2</div>
|
|
||||||
<div class="phase-title">MR Discussion Sync <span class="phase-subtitle">Parallel Prefetch + Serial Write</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="info-box" style="margin-left:40px;margin-bottom:16px;">
|
|
||||||
<div class="info-box-title">Key Differences from Issue Discussions</div>
|
|
||||||
<ul>
|
|
||||||
<li><b>Parallel prefetch</b> — fetches all discussions for a batch concurrently via <code>join_all()</code></li>
|
|
||||||
<li><b>Upsert pattern</b> — notes use INSERT...ON CONFLICT (not delete-all + re-insert)</li>
|
|
||||||
<li><b>Sweep stale</b> — uses <code>last_seen_at</code> timestamp comparison (not set difference)</li>
|
|
||||||
<li><b>Sync health tracking</b> — records <code>discussions_sync_attempts</code> and <code>last_error</code></li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node db" data-detail="mr-disc-query"><div class="node-title">Query Stale MRs</div><div class="node-desc">updated_at > COALESCE(<br>discussions_synced_for_<br>updated_at, 0)</div><div class="diff-badge same">Same watermark logic</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node decision" data-detail="mr-disc-batch"><div class="node-title">Batch by Concurrency</div><div class="node-desc">dependent_concurrency<br>MRs per batch</div><div class="diff-badge changed">Batched processing</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="arrow-down">↓</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node api" data-detail="mr-disc-prefetch"><div class="node-title">Parallel Prefetch</div><div class="node-desc">join_all() fetches all<br>discussions for batch</div><div class="diff-badge changed">Parallel (not sequential)</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node transform" data-detail="mr-disc-transform"><div class="node-title">Transform In-Memory</div><div class="node-desc">transform_mr_discussion()<br>+ diff position notes</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="mr-disc-write"><div class="node-title">Serial Write</div><div class="node-desc">upsert discussion<br>upsert notes (ON CONFLICT)</div><div class="diff-badge changed">Upsert, not delete+insert</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-container">
|
|
||||||
<div class="branch-label success">✓ On Full Success</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node db" data-detail="mr-disc-sweep"><div class="node-title">Sweep Stale</div><div class="node-desc">DELETE WHERE last_seen_at<br>< run_seen_at (disc + notes)</div><div class="diff-badge changed">last_seen_at sweep</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="mr-disc-watermark"><div class="node-title">Advance Watermark</div><div class="node-desc">discussions_synced_for_<br>updated_at = updated_at</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-label error">✗ On Failure</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node error" data-detail="mr-disc-fail"><div class="node-title">Record Sync Health</div><div class="node-desc">Watermark NOT advanced<br>Tracks attempts + last_error</div><div class="diff-badge changed">Health tracking</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:rgba(247,120,186,0.15);color:var(--pink)">3</div>
|
|
||||||
<div class="phase-title">Resource Events <span class="phase-subtitle">Same as Issues</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="info-box" style="margin-left:40px">
|
|
||||||
<div class="info-box-title">Identical to Issue Resource Events</div>
|
|
||||||
<ul>
|
|
||||||
<li>Same queue-based approach: cleanup → enqueue → claim → fetch → store/fail</li>
|
|
||||||
<li>Same watermark column: <code>resource_events_synced_for_updated_at</code></li>
|
|
||||||
<li>Same error handling: 404/403 coalesced to empty, transient errors get backoff</li>
|
|
||||||
<li>entity_type = <code>"merge_request"</code> instead of <code>"issue"</code></li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- DOCUMENTS -->
|
|
||||||
<div class="flow-container" id="view-docs">
|
|
||||||
<div class="legend">
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--cyan)"></div>Trigger</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--purple)"></div>Extract</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--green)"></div>Database</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--amber)"></div>Decision</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--red)"></div>Error</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:var(--cyan-dim);color:var(--cyan)">1</div>
|
|
||||||
<div class="phase-title">Dirty Source Queue <span class="phase-subtitle">Populated During Ingestion</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node api" data-detail="doc-trigger"><div class="node-title">mark_dirty_tx()</div><div class="node-desc">Called during every issue/<br>MR/discussion upsert</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="doc-dirty-table"><div class="node-title">dirty_sources Table</div><div class="node-desc">INSERT (source_type, source_id)<br>ON CONFLICT reset backoff</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:var(--amber-dim);color:var(--amber)">2</div>
|
|
||||||
<div class="phase-title">Drain Loop <span class="phase-subtitle">Batch 500, Respects Backoff</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node db" data-detail="doc-drain"><div class="node-title">Get Dirty Sources</div><div class="node-desc">Batch 500, ORDER BY<br>attempt_count, queued_at</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node decision" data-detail="doc-dispatch"><div class="node-title">Dispatch by Type</div><div class="node-desc">issue / mr / discussion<br>→ extract function</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node decision" data-detail="doc-deleted-check"><div class="node-title">Source Exists?</div><div class="node-desc">If deleted: remove doc row<br>(cascade cleans FTS + embeds)</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="arrow-down">↓</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node transform" data-detail="doc-extract"><div class="node-title">Extract Content</div><div class="node-desc">Structured text:<br>header + metadata + body</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node decision" data-detail="doc-triple-hash"><div class="node-title">Triple-Hash Check</div><div class="node-desc">content_hash + labels_hash<br>+ paths_hash all match?</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="doc-write"><div class="node-title">SAVEPOINT Write</div><div class="node-desc">Atomic: document row +<br>labels + paths</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-container">
|
|
||||||
<div class="branch-label success">✓ On Success</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node db" data-detail="doc-clear"><div class="node-title">clear_dirty()</div><div class="node-desc">Remove from dirty_sources</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-label error">✗ On Error</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node error" data-detail="doc-error"><div class="node-title">record_dirty_error()</div><div class="node-desc">Increment attempt_count<br>Exponential backoff</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-label" style="color:var(--purple)">≡ Triple-Hash Match (skip)</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node db" data-detail="doc-skip"><div class="node-title">Skip Write</div><div class="node-desc">All 3 hashes match →<br>no WAL churn, clear dirty</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="info-box">
|
|
||||||
<div class="info-box-title">Full Mode (<code>--full</code>)</div>
|
|
||||||
<ul>
|
|
||||||
<li>Seeds <b>ALL</b> entities into <code>dirty_sources</code> via keyset pagination</li>
|
|
||||||
<li>Triple-hash optimization prevents redundant writes even in full mode</li>
|
|
||||||
<li>Runs FTS <code>OPTIMIZE</code> after drain completes</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- EMBEDDINGS -->
|
|
||||||
<div class="flow-container" id="view-embed">
|
|
||||||
<div class="legend">
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--cyan)"></div>API (Ollama)</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--purple)"></div>Processing</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--green)"></div>Database</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--amber)"></div>Decision</div>
|
|
||||||
<div class="legend-item"><div class="legend-color" style="background:var(--red)"></div>Error</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:var(--amber-dim);color:var(--amber)">1</div>
|
|
||||||
<div class="phase-title">Change Detection <span class="phase-subtitle">Hash + Config Drift</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node decision" data-detail="embed-detect"><div class="node-title">find_pending_documents()</div><div class="node-desc">No metadata row? OR<br>document_hash mismatch? OR<br>config drift?</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="embed-paginate"><div class="node-title">Keyset Pagination</div><div class="node-desc">500 documents per page<br>ordered by doc ID</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:var(--purple-dim);color:var(--purple)">2</div>
|
|
||||||
<div class="phase-title">Chunking <span class="phase-subtitle">Split + Overflow Guard</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node transform" data-detail="embed-chunk"><div class="node-title">split_into_chunks()</div><div class="node-desc">Split by paragraph boundaries<br>with configurable overlap</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node decision" data-detail="embed-overflow"><div class="node-title">Overflow Guard</div><div class="node-desc">Too many chunks?<br>Skip to prevent rowid collision</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node transform" data-detail="embed-work"><div class="node-title">Build ChunkWork</div><div class="node-desc">Assign encoded chunk IDs<br>per document</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="phase">
|
|
||||||
<div class="phase-header">
|
|
||||||
<div class="phase-number" style="background:var(--cyan-dim);color:var(--cyan)">3</div>
|
|
||||||
<div class="phase-title">Ollama Embedding <span class="phase-subtitle">Batched API Calls</span></div>
|
|
||||||
</div>
|
|
||||||
<div class="flow-row">
|
|
||||||
<div class="node api" data-detail="embed-batch"><div class="node-title">Batch Embed</div><div class="node-desc">32 chunks per Ollama<br>API call</div></div>
|
|
||||||
<div class="arrow">→</div>
|
|
||||||
<div class="node db" data-detail="embed-store"><div class="node-title">Store Vectors</div><div class="node-desc">sqlite-vec embeddings table<br>+ embedding_metadata</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-container">
|
|
||||||
<div class="branch-label success">✓ On Success</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node db" data-detail="embed-success"><div class="node-title">SAVEPOINT Commit</div><div class="node-desc">Atomic per page:<br>clear old + write new</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-label retry">↻ Context-Length Error</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node error" data-detail="embed-ctx-error"><div class="node-title">Retry Individually</div><div class="node-desc">Re-embed each chunk solo<br>to isolate oversized one</div></div>
|
|
||||||
</div>
|
|
||||||
<div class="branch-label error">✗ Other Error</div>
|
|
||||||
<div class="branch-row">
|
|
||||||
<div class="node error" data-detail="embed-other-error"><div class="node-title">Record Error</div><div class="node-desc">Store in embedding_metadata<br>for retry next run</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="info-box">
|
|
||||||
<div class="info-box-title">Full Mode (<code>--full</code>)</div>
|
|
||||||
<ul>
|
|
||||||
<li>DELETEs all <code>embedding_metadata</code> and <code>embeddings</code> rows first</li>
|
|
||||||
<li>Every document re-processed from scratch</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div class="info-box">
|
|
||||||
<div class="info-box-title">Non-Fatal in Sync</div>
|
|
||||||
<ul>
|
|
||||||
<li>Stage 4 failures (Ollama down, model missing) are <b>graceful</b></li>
|
|
||||||
<li>Sync completes successfully; embeddings just won't be updated</li>
|
|
||||||
<li>Semantic search degrades to FTS-only mode</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div></div>
|
|
||||||
|
|
||||||
<!-- Watermark Panel -->
|
|
||||||
<div class="watermark-panel">
|
|
||||||
<div class="watermark-toggle" onclick="toggleWatermarks()">
|
|
||||||
<span class="chevron" id="wm-chevron">▲</span>
|
|
||||||
Watermark & Cursor Reference
|
|
||||||
</div>
|
|
||||||
<div class="watermark-content" id="wm-content">
|
|
||||||
<table class="wm-table">
|
|
||||||
<thead><tr><th>Table</th><th>Column(s)</th><th>Purpose</th></tr></thead>
|
|
||||||
<tbody>
|
|
||||||
<tr><td>sync_cursors</td><td>updated_at_cursor + tie_breaker_id</td><td>Incremental fetch: "last entity we saw" per project+type</td></tr>
|
|
||||||
<tr><td>issues</td><td>discussions_synced_for_updated_at</td><td>Per-issue discussion watermark</td></tr>
|
|
||||||
<tr><td>issues</td><td>resource_events_synced_for_updated_at</td><td>Per-issue resource event watermark</td></tr>
|
|
||||||
<tr><td>merge_requests</td><td>discussions_synced_for_updated_at</td><td>Per-MR discussion watermark</td></tr>
|
|
||||||
<tr><td>merge_requests</td><td>resource_events_synced_for_updated_at</td><td>Per-MR resource event watermark</td></tr>
|
|
||||||
<tr><td>dirty_sources</td><td>queued_at + next_attempt_at</td><td>Document regeneration queue with backoff</td></tr>
|
|
||||||
<tr><td>embedding_metadata</td><td>document_hash + chunk_max_bytes + model + dims</td><td>Embedding staleness detection</td></tr>
|
|
||||||
<tr><td>pending_dependent_fetches</td><td>locked_at + next_retry_at + attempts</td><td>Resource event job queue with backoff</td></tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- Detail Panel -->
|
|
||||||
<div class="detail-panel" id="detail-panel">
|
|
||||||
<div class="detail-header">
|
|
||||||
<h2 id="detail-title">Node Details</h2>
|
|
||||||
<button class="detail-close" onclick="closeDetail()">×</button>
|
|
||||||
</div>
|
|
||||||
<div class="detail-body" id="detail-body"></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
const viewTitles = {
|
|
||||||
overview: 'Full Sync Overview', issues: 'Issue Ingestion Flow',
|
|
||||||
mrs: 'Merge Request Ingestion Flow', docs: 'Document Generation Flow',
|
|
||||||
embed: 'Embedding Generation Flow',
|
|
||||||
};
|
|
||||||
const viewBadges = {
|
|
||||||
overview: '4 stages', issues: '3 phases', mrs: '3 phases',
|
|
||||||
docs: '2 phases', embed: '3 phases',
|
|
||||||
};
|
|
||||||
|
|
||||||
function switchView(view) {
|
|
||||||
document.querySelectorAll('.flow-container').forEach(function(el) { el.classList.remove('active'); });
|
|
||||||
document.getElementById('view-' + view).classList.add('active');
|
|
||||||
document.querySelectorAll('.nav-item').forEach(function(el) {
|
|
||||||
el.classList.toggle('active', el.dataset.view === view);
|
|
||||||
});
|
|
||||||
document.getElementById('view-title').textContent = viewTitles[view];
|
|
||||||
document.getElementById('view-badge').textContent = viewBadges[view];
|
|
||||||
closeDetail();
|
|
||||||
}
|
|
||||||
|
|
||||||
function toggleWatermarks() {
|
|
||||||
document.getElementById('wm-content').classList.toggle('open');
|
|
||||||
document.getElementById('wm-chevron').classList.toggle('open');
|
|
||||||
}
|
|
||||||
|
|
||||||
var details = {
|
|
||||||
'issue-api-call': { title: 'GitLab API: Paginate Issues', type: 'api', file: 'src/ingestion/issues.rs:51-140', desc: 'Streams issues from the GitLab API using cursor-based incremental sync. The API is called with updated_after set to the last known cursor minus a configurable rewind window (to handle clock skew between GitLab and the local database).', sql: 'GET /api/v4/projects/{id}/issues\n ?updated_after={cursor - rewind_seconds}\n &order_by=updated_at&sort=asc\n &per_page=100' },
|
|
||||||
'issue-cursor-filter': { title: 'Cursor Filter (Dedup)', type: 'decision', file: 'src/ingestion/issues.rs:95-110', desc: 'Because of the cursor rewind, some issues will be re-fetched that we already have. The cursor filter skips these using a two-part comparison: primary on updated_at timestamp, with gitlab_id as a tie-breaker when timestamps are equal.', sql: '// Pseudocode:\nif issue.updated_at > cursor_ts:\n ACCEPT // newer than cursor\nelif issue.updated_at == cursor_ts\n AND issue.gitlab_id > tie_breaker_id:\n ACCEPT // same timestamp, higher ID\nelse:\n SKIP // already processed' },
|
|
||||||
'issue-transform': { title: 'Transform Issue', type: 'transform', file: 'src/gitlab/transformers/issue.rs', desc: 'Maps the GitLab API response shape to the local database row shape. Parses ISO 8601 timestamps to milliseconds-since-epoch, extracts label names, assignee usernames, milestone info, and due dates.' },
|
|
||||||
'issue-transaction': { title: 'Issue Write Transaction', type: 'db', file: 'src/ingestion/issues.rs:190-220', desc: 'All operations for a single issue are wrapped in one SQLite transaction for atomicity. If any step fails, the entire issue write is rolled back.', sql: 'BEGIN;\n-- 1. Store raw JSON payload (compressed, deduped)\nINSERT INTO payloads ...;\n-- 2. Upsert issue row\nINSERT INTO issues ... ON CONFLICT(gitlab_id)\n DO UPDATE SET ...;\n-- 3. Mark dirty for document regen\nINSERT INTO dirty_sources ...;\n-- 4. Relink labels\nDELETE FROM issue_labels WHERE issue_id = ?;\nINSERT INTO labels ... ON CONFLICT DO UPDATE;\nINSERT INTO issue_labels ...;\n-- 5. Relink assignees\nDELETE FROM issue_assignees WHERE issue_id = ?;\nINSERT INTO issue_assignees ...;\nCOMMIT;' },
|
|
||||||
'issue-cursor-update': { title: 'Update Sync Cursor', type: 'db', file: 'src/ingestion/issues.rs:130-140', desc: 'The sync cursor is updated every 100 issues (for crash recovery) and once at the end of the stream. If the process crashes mid-sync, it resumes from at most 100 issues back.', sql: 'INSERT INTO sync_cursors\n (project_id, resource_type,\n updated_at_cursor, tie_breaker_id)\nVALUES (?1, \'issues\', ?2, ?3)\nON CONFLICT(project_id, resource_type)\n DO UPDATE SET\n updated_at_cursor = ?2,\n tie_breaker_id = ?3;' },
|
|
||||||
'issue-disc-query': { title: 'Query Issues Needing Discussion Sync', type: 'db', file: 'src/ingestion/issues.rs:450-471', desc: 'Finds all issues in this project whose updated_at timestamp exceeds their per-row discussion watermark. Issues that have not changed since their last discussion sync are skipped entirely.', sql: 'SELECT id, iid, updated_at\nFROM issues\nWHERE project_id = ?1\n AND updated_at > COALESCE(\n discussions_synced_for_updated_at, 0\n );' },
|
|
||||||
'issue-disc-fetch': { title: 'Paginate Issue Discussions', type: 'api', file: 'src/ingestion/discussions.rs:73-205', desc: 'Discussions are fetched sequentially per issue (rusqlite Connection is not Send, so async parallelism is not possible here). Each issue\'s discussions are streamed page by page from the GitLab API.', sql: 'GET /api/v4/projects/{id}/issues/{iid}\n /discussions?per_page=100' },
|
|
||||||
'issue-disc-transform': { title: 'Transform Discussion + Notes', type: 'transform', file: 'src/gitlab/transformers/discussion.rs', desc: 'Transforms the raw GitLab discussion payload into normalized rows. Sets NoteableRef::Issue. Computes resolvable/resolved status, first_note_at/last_note_at timestamps, and per-note position indices.' },
|
|
||||||
'issue-disc-write': { title: 'Write Discussion (Full Refresh)', type: 'db', file: 'src/ingestion/discussions.rs:140-180', desc: 'Issue discussions use a full-refresh pattern: all existing notes for a discussion are deleted and re-inserted. This is simpler than upsert but means partial failures lose the previous state.', sql: 'BEGIN;\nINSERT INTO payloads ...;\nINSERT INTO discussions ... ON CONFLICT DO UPDATE;\nINSERT INTO dirty_sources ...;\n-- Full refresh: delete all then re-insert\nDELETE FROM notes WHERE discussion_id = ?;\nINSERT INTO notes VALUES (...);\nCOMMIT;' },
|
|
||||||
'issue-disc-stale': { title: 'Remove Stale Discussions', type: 'db', file: 'src/ingestion/discussions.rs:185-195', desc: 'After successfully fetching ALL discussion pages for an issue, any discussions in the DB that were not seen in this fetch are deleted. Uses a temp table for >500 IDs to avoid SQLite\'s 999-variable limit.', sql: '-- For small sets (<= 500):\nDELETE FROM discussions\nWHERE issue_id = ?\n AND gitlab_id NOT IN (...);\n\n-- For large sets (> 500):\nCREATE TEMP TABLE seen_ids(id TEXT);\nINSERT INTO seen_ids ...;\nDELETE FROM discussions\nWHERE issue_id = ?\n AND gitlab_id NOT IN\n (SELECT id FROM seen_ids);\nDROP TABLE seen_ids;' },
|
|
||||||
'issue-disc-watermark': { title: 'Advance Discussion Watermark', type: 'db', file: 'src/ingestion/discussions.rs:198', desc: 'Sets the per-issue watermark to the issue\'s current updated_at, signaling that discussions are now synced for this version of the issue.', sql: 'UPDATE issues\nSET discussions_synced_for_updated_at\n = updated_at\nWHERE id = ?;' },
|
|
||||||
'issue-disc-fail': { title: 'Pagination Error Handling', type: 'error', file: 'src/ingestion/discussions.rs:182', desc: 'If pagination fails mid-stream, stale discussion removal is skipped (we don\'t know the full set) and the watermark is NOT advanced. The issue will be retried on the next sync run.' },
|
|
||||||
're-cleanup': { title: 'Cleanup Obsolete Jobs', type: 'queue', file: 'src/ingestion/orchestrator.rs:490-520', desc: 'Before enqueuing new jobs, delete any existing jobs for entities whose watermark is already current. These are leftover from a previous run.', sql: 'DELETE FROM pending_dependent_fetches\nWHERE project_id = ?\n AND job_type = \'resource_events\'\n AND entity_local_id IN (\n SELECT id FROM issues\n WHERE project_id = ?\n AND updated_at <= COALESCE(\n resource_events_synced_for_updated_at, 0\n )\n );' },
|
|
||||||
're-enqueue': { title: 'Enqueue Resource Event Jobs', type: 'queue', file: 'src/ingestion/orchestrator.rs:525-555', desc: 'For each entity whose updated_at exceeds its resource event watermark, insert a job into the queue. Uses INSERT OR IGNORE for idempotency.', sql: 'INSERT OR IGNORE INTO pending_dependent_fetches\n (project_id, entity_type, entity_iid,\n entity_local_id, job_type, enqueued_at)\nSELECT project_id, \'issue\', iid, id,\n \'resource_events\', ?now\nFROM issues\nWHERE project_id = ?\n AND updated_at > COALESCE(\n resource_events_synced_for_updated_at, 0\n );' },
|
|
||||||
're-claim': { title: 'Claim Jobs (Atomic Lock)', type: 'queue', file: 'src/core/dependent_queue.rs', desc: 'Atomically claims a batch of unlocked jobs whose backoff period has elapsed. Uses UPDATE...RETURNING for lock acquisition in a single statement.', sql: 'UPDATE pending_dependent_fetches\nSET locked_at = ?now\nWHERE rowid IN (\n SELECT rowid\n FROM pending_dependent_fetches\n WHERE project_id = ?\n AND job_type = \'resource_events\'\n AND locked_at IS NULL\n AND (next_retry_at IS NULL\n OR next_retry_at <= ?now)\n ORDER BY enqueued_at ASC\n LIMIT ?batch_size\n)\nRETURNING *;' },
|
|
||||||
're-fetch': { title: 'Fetch 3 Event Types Concurrently', type: 'api', file: 'src/gitlab/client.rs:732-771', desc: 'Uses tokio::join! (not try_join!) to fetch state, label, and milestone events concurrently. Permanent errors (404, 403) are coalesced to empty vecs via coalesce_inaccessible().', sql: 'tokio::join!(\n fetch_issue_state_events(proj, iid),\n fetch_issue_label_events(proj, iid),\n fetch_issue_milestone_events(proj, iid),\n)\n// Each: coalesce_inaccessible()\n// 404/403 -> Ok(vec![])\n// Other errors -> propagated' },
|
|
||||||
're-store': { title: 'Store Resource Events', type: 'db', file: 'src/ingestion/orchestrator.rs:620-640', desc: 'All three event types are upserted in a single transaction.', sql: 'BEGIN;\nINSERT INTO resource_state_events ...\n ON CONFLICT DO UPDATE;\nINSERT INTO resource_label_events ...\n ON CONFLICT DO UPDATE;\nINSERT INTO resource_milestone_events ...\n ON CONFLICT DO UPDATE;\nCOMMIT;' },
|
|
||||||
're-complete': { title: 'Complete Job + Advance Watermark', type: 'db', file: 'src/ingestion/orchestrator.rs:645-660', desc: 'After successful storage, the job row is deleted and the entity\'s watermark is advanced.', sql: 'DELETE FROM pending_dependent_fetches\n WHERE rowid = ?;\n\nUPDATE issues\nSET resource_events_synced_for_updated_at\n = updated_at\nWHERE id = ?;' },
|
|
||||||
're-permanent': { title: 'Permanent Error: Skip Entity', type: 'error', file: 'src/ingestion/orchestrator.rs:665-680', desc: '404 (endpoint doesn\'t exist) and 403 (insufficient permissions) are permanent. The job is completed and watermark advanced, so this entity is permanently skipped until next updated on GitLab.' },
|
|
||||||
're-transient': { title: 'Transient Error: Exponential Backoff', type: 'error', file: 'src/core/dependent_queue.rs', desc: 'Network errors, 500s, rate limits get exponential backoff. Formula: 30s * 2^(attempts-1), capped at 480s (8 minutes).', sql: 'UPDATE pending_dependent_fetches\nSET locked_at = NULL,\n attempts = attempts + 1,\n next_retry_at = ?now\n + 30000 * pow(2, attempts),\n -- capped at 480000ms (8 min)\n last_error = ?error_msg\nWHERE rowid = ?;' },
|
|
||||||
'mr-api-call': { title: 'GitLab API: Fetch MR Pages', type: 'api', file: 'src/ingestion/merge_requests.rs:51-151', desc: 'Unlike issues which stream, MRs use explicit page-based pagination via fetch_merge_requests_page(). Each page returns items plus a next_page indicator.', sql: 'GET /api/v4/projects/{id}/merge_requests\n ?updated_after={cursor - rewind}\n &order_by=updated_at&sort=asc\n &per_page=100&page={n}' },
|
|
||||||
'mr-cursor-filter': { title: 'Cursor Filter', type: 'decision', file: 'src/ingestion/merge_requests.rs:90-105', desc: 'Identical logic to issues: timestamp comparison with gitlab_id tie-breaker.' },
|
|
||||||
'mr-transform': { title: 'Transform Merge Request', type: 'transform', file: 'src/gitlab/transformers/mr.rs', desc: 'Maps GitLab MR response to local row. Handles draft detection (prefers draft field, falls back to work_in_progress), detailed_merge_status, merge_user resolution, and reviewer extraction.' },
|
|
||||||
'mr-transaction': { title: 'MR Write Transaction', type: 'db', file: 'src/ingestion/merge_requests.rs:170-210', desc: 'Same pattern as issues but with THREE junction tables: labels, assignees, AND reviewers.', sql: 'BEGIN;\nINSERT INTO payloads ...;\nINSERT INTO merge_requests ...\n ON CONFLICT DO UPDATE;\nINSERT INTO dirty_sources ...;\n-- 3 junction tables:\nDELETE FROM mr_labels WHERE mr_id = ?;\nINSERT INTO mr_labels ...;\nDELETE FROM mr_assignees WHERE mr_id = ?;\nINSERT INTO mr_assignees ...;\nDELETE FROM mr_reviewers WHERE mr_id = ?;\nINSERT INTO mr_reviewers ...;\nCOMMIT;' },
|
|
||||||
'mr-cursor-update': { title: 'Update Cursor Per Page', type: 'db', file: 'src/ingestion/merge_requests.rs:140-150', desc: 'Unlike issues (every 100 items), MR cursor is updated at each page boundary for better crash recovery.' },
|
|
||||||
'mr-disc-query': { title: 'Query MRs Needing Discussion Sync', type: 'db', file: 'src/ingestion/merge_requests.rs:430-451', desc: 'Same watermark pattern as issues. Runs AFTER MR ingestion to avoid memory growth.', sql: 'SELECT id, iid, updated_at\nFROM merge_requests\nWHERE project_id = ?1\n AND updated_at > COALESCE(\n discussions_synced_for_updated_at, 0\n );' },
|
|
||||||
'mr-disc-batch': { title: 'Batch by Concurrency', type: 'decision', file: 'src/ingestion/orchestrator.rs:420-465', desc: 'MRs are processed in batches sized by dependent_concurrency. Each batch first prefetches all discussions in parallel, then writes serially.' },
|
|
||||||
'mr-disc-prefetch': { title: 'Parallel Prefetch', type: 'api', file: 'src/ingestion/mr_discussions.rs:66-120', desc: 'All MRs in the batch have their discussions fetched concurrently via join_all(). Each MR\'s discussions are fetched in one call, transformed in memory, and returned as PrefetchedMrDiscussions.', sql: '// For each MR in batch, concurrently:\nGET /api/v4/projects/{id}/merge_requests\n /{iid}/discussions?per_page=100\n\n// All fetched + transformed in memory\n// before any DB writes happen' },
|
|
||||||
'mr-disc-transform': { title: 'Transform MR Discussions', type: 'transform', file: 'src/ingestion/mr_discussions.rs:125-160', desc: 'Uses transform_mr_discussion() which additionally handles DiffNote positions (file paths, line ranges, SHA triplets).' },
|
|
||||||
'mr-disc-write': { title: 'Serial Write (Upsert Pattern)', type: 'db', file: 'src/ingestion/mr_discussions.rs:165-220', desc: 'Unlike issue discussions (delete-all + re-insert), MR discussions use INSERT...ON CONFLICT DO UPDATE for both discussions and notes. Safer for partial failures.', sql: 'BEGIN;\nINSERT INTO payloads ...;\nINSERT INTO discussions ...\n ON CONFLICT DO UPDATE\n SET ..., last_seen_at = ?run_ts;\nINSERT INTO dirty_sources ...;\n-- Upsert notes (not delete+insert):\nINSERT INTO notes ...\n ON CONFLICT DO UPDATE\n SET ..., last_seen_at = ?run_ts;\nCOMMIT;' },
|
|
||||||
'mr-disc-sweep': { title: 'Sweep Stale (last_seen_at)', type: 'db', file: 'src/ingestion/mr_discussions.rs:225-245', desc: 'Staleness detected via last_seen_at timestamps. Both discussions AND notes are swept independently.', sql: '-- Sweep stale discussions:\nDELETE FROM discussions\nWHERE merge_request_id = ?\n AND last_seen_at < ?run_seen_at;\n\n-- Sweep stale notes:\nDELETE FROM notes\nWHERE discussion_id IN (\n SELECT id FROM discussions\n WHERE merge_request_id = ?\n) AND last_seen_at < ?run_seen_at;' },
|
|
||||||
'mr-disc-watermark': { title: 'Advance MR Discussion Watermark', type: 'db', file: 'src/ingestion/mr_discussions.rs:248', desc: 'Same as issues: stamps the per-MR watermark.', sql: 'UPDATE merge_requests\nSET discussions_synced_for_updated_at\n = updated_at\nWHERE id = ?;' },
|
|
||||||
'mr-disc-fail': { title: 'Failure: Sync Health Tracking', type: 'error', file: 'src/ingestion/mr_discussions.rs:252-260', desc: 'Unlike issues, MR discussion failures are tracked: discussions_sync_attempts is incremented and discussions_sync_last_error is recorded. Watermark is NOT advanced.' },
|
|
||||||
'doc-trigger': { title: 'mark_dirty_tx()', type: 'api', file: 'src/ingestion/dirty_tracker.rs', desc: 'Called during every upsert in ingestion. Inserts into dirty_sources, or on conflict resets backoff. This bridges ingestion (stages 1-2) and document generation (stage 3).', sql: 'INSERT INTO dirty_sources\n (source_type, source_id, queued_at)\nVALUES (?1, ?2, ?now)\nON CONFLICT(source_type, source_id)\n DO UPDATE SET\n queued_at = ?now,\n attempt_count = 0,\n next_attempt_at = NULL,\n last_error = NULL;' },
|
|
||||||
'doc-dirty-table': { title: 'dirty_sources Table', type: 'db', file: 'src/ingestion/dirty_tracker.rs', desc: 'Persistent queue of entities needing document regeneration. Supports exponential backoff for failed extractions.' },
|
|
||||||
'doc-drain': { title: 'Get Dirty Sources (Batched)', type: 'db', file: 'src/documents/regenerator.rs:35-45', desc: 'Fetches up to 500 dirty entries per batch, prioritizing fewer attempts. Respects exponential backoff.', sql: 'SELECT source_type, source_id\nFROM dirty_sources\nWHERE next_attempt_at IS NULL\n OR next_attempt_at <= ?now\nORDER BY attempt_count ASC,\n queued_at ASC\nLIMIT 500;' },
|
|
||||||
'doc-dispatch': { title: 'Dispatch by Source Type', type: 'decision', file: 'src/documents/extractor.rs', desc: 'Routes to the appropriate extraction function: "issue" -> extract_issue_document(), "merge_request" -> extract_mr_document(), "discussion" -> extract_discussion_document().' },
|
|
||||||
'doc-deleted-check': { title: 'Source Exists Check', type: 'decision', file: 'src/documents/regenerator.rs:48-55', desc: 'If the source entity was deleted, the extractor returns None. The regenerator deletes the document row. FK cascades clean up FTS and embeddings.' },
|
|
||||||
'doc-extract': { title: 'Extract Structured Content', type: 'transform', file: 'src/documents/extractor.rs', desc: 'Builds searchable text:\n[[Issue]] #42: Title\nProject: group/repo\nURL: ...\nLabels: [bug, urgent]\nState: opened\n\n--- Description ---\n...\n\nDiscussions inherit parent labels and extract DiffNote file paths.' },
|
|
||||||
'doc-triple-hash': { title: 'Triple-Hash Write Optimization', type: 'decision', file: 'src/documents/regenerator.rs:55-62', desc: 'Checks content_hash + labels_hash + paths_hash against existing document. If ALL three match, write is completely skipped. Critical for --full mode performance.' },
|
|
||||||
'doc-write': { title: 'SAVEPOINT Atomic Write', type: 'db', file: 'src/documents/regenerator.rs:58-65', desc: 'Document, labels, and paths written inside a SAVEPOINT for atomicity.', sql: 'SAVEPOINT doc_write;\nINSERT INTO documents ...\n ON CONFLICT DO UPDATE SET\n content = ?, content_hash = ?,\n labels_hash = ?, paths_hash = ?;\nDELETE FROM document_labels\n WHERE doc_id = ?;\nINSERT INTO document_labels ...;\nDELETE FROM document_paths\n WHERE doc_id = ?;\nINSERT INTO document_paths ...;\nRELEASE doc_write;' },
|
|
||||||
'doc-clear': { title: 'Clear Dirty Entry', type: 'db', file: 'src/ingestion/dirty_tracker.rs', desc: 'On success, the dirty_sources row is deleted.', sql: 'DELETE FROM dirty_sources\nWHERE source_type = ?\n AND source_id = ?;' },
|
|
||||||
'doc-error': { title: 'Record Error + Backoff', type: 'error', file: 'src/ingestion/dirty_tracker.rs', desc: 'Increments attempt_count, sets next_attempt_at with exponential backoff. Entry stays for retry.', sql: 'UPDATE dirty_sources\nSET attempt_count = attempt_count + 1,\n next_attempt_at = ?now\n + compute_backoff(attempt_count),\n last_error = ?error_msg\nWHERE source_type = ?\n AND source_id = ?;' },
|
|
||||||
'doc-skip': { title: 'Skip Write (Hash Match)', type: 'db', file: 'src/documents/regenerator.rs:57', desc: 'When all three hashes match, the document has not actually changed. Common when updated_at changes but content/labels/paths remain the same. Dirty entry is cleared without writes.' },
|
|
||||||
'embed-detect': { title: 'Change Detection', type: 'decision', file: 'src/embedding/change_detector.rs', desc: 'Document needs re-embedding if: (1) No embedding_metadata row, (2) document_hash mismatch, (3) Config drift in chunk_max_bytes, model, or dims.', sql: 'SELECT d.id, d.content, d.content_hash\nFROM documents d\nLEFT JOIN embedding_metadata em\n ON em.document_id = d.id\nWHERE em.document_id IS NULL\n OR em.document_hash != d.content_hash\n OR em.chunk_max_bytes != ?config\n OR em.model != ?model\n OR em.dims != ?dims;' },
|
|
||||||
'embed-paginate': { title: 'Keyset Pagination', type: 'db', file: 'src/embedding/pipeline.rs:80-100', desc: '500 documents per page using keyset pagination. Each page wrapped in a SAVEPOINT.' },
|
|
||||||
'embed-chunk': { title: 'Split Into Chunks', type: 'transform', file: 'src/embedding/chunking.rs', desc: 'Splits content at paragraph boundaries with configurable max size and overlap.' },
|
|
||||||
'embed-overflow': { title: 'Overflow Guard', type: 'decision', file: 'src/embedding/pipeline.rs:110-120', desc: 'If a document produces too many chunks, it is skipped to prevent rowid collisions in the encoded chunk ID scheme.' },
|
|
||||||
'embed-work': { title: 'Build ChunkWork Items', type: 'transform', file: 'src/embedding/pipeline.rs:125-140', desc: 'Each chunk gets an encoded ID (document_id * 1000000 + chunk_index) for the sqlite-vec primary key.' },
|
|
||||||
'embed-batch': { title: 'Batch Embed via Ollama', type: 'api', file: 'src/embedding/pipeline.rs:150-200', desc: 'Sends 32 chunks per Ollama API call. Model default: nomic-embed-text.', sql: 'POST http://localhost:11434/api/embed\n{\n "model": "nomic-embed-text",\n "input": ["chunk1...", "chunk2...", ...]\n}' },
|
|
||||||
'embed-store': { title: 'Store Vectors', type: 'db', file: 'src/embedding/pipeline.rs:205-230', desc: 'Vectors stored in sqlite-vec virtual table. Metadata in embedding_metadata. Old embeddings cleared on first successful chunk.', sql: '-- Clear old embeddings:\nDELETE FROM embeddings\n WHERE rowid / 1000000 = ?doc_id;\n\n-- Insert new vector:\nINSERT INTO embeddings(rowid, embedding)\nVALUES (?chunk_id, ?vector_blob);\n\n-- Update metadata:\nINSERT INTO embedding_metadata ...\n ON CONFLICT DO UPDATE SET\n document_hash = ?,\n chunk_max_bytes = ?,\n model = ?, dims = ?;' },
|
|
||||||
'embed-success': { title: 'SAVEPOINT Commit', type: 'db', file: 'src/embedding/pipeline.rs:240-250', desc: 'Each page of 500 documents wrapped in a SAVEPOINT. Completed pages survive crashes.' },
|
|
||||||
'embed-ctx-error': { title: 'Context-Length Retry', type: 'error', file: 'src/embedding/pipeline.rs:260-280', desc: 'If Ollama returns context-length error for a batch, each chunk is retried individually to isolate the oversized one.' },
|
|
||||||
'embed-other-error': { title: 'Record Error for Retry', type: 'error', file: 'src/embedding/pipeline.rs:285-295', desc: 'Network/model errors recorded in embedding_metadata. Document detected as pending again on next run.' },
|
|
||||||
};
|
|
||||||
|
|
||||||
function escapeHtml(str) {
|
|
||||||
var div = document.createElement('div');
|
|
||||||
div.appendChild(document.createTextNode(str));
|
|
||||||
return div.textContent;
|
|
||||||
}
|
|
||||||
|
|
||||||
function buildDetailContent(d) {
|
|
||||||
var container = document.createDocumentFragment();
|
|
||||||
|
|
||||||
// Tags section
|
|
||||||
var tagSection = document.createElement('div');
|
|
||||||
tagSection.className = 'detail-section';
|
|
||||||
var typeTag = document.createElement('span');
|
|
||||||
typeTag.className = 'detail-tag type-' + d.type;
|
|
||||||
typeTag.textContent = d.type.toUpperCase();
|
|
||||||
tagSection.appendChild(typeTag);
|
|
||||||
if (d.file) {
|
|
||||||
var fileTag = document.createElement('span');
|
|
||||||
fileTag.className = 'detail-tag file';
|
|
||||||
fileTag.textContent = d.file;
|
|
||||||
tagSection.appendChild(fileTag);
|
|
||||||
}
|
|
||||||
container.appendChild(tagSection);
|
|
||||||
|
|
||||||
// Description
|
|
||||||
var descSection = document.createElement('div');
|
|
||||||
descSection.className = 'detail-section';
|
|
||||||
var descH3 = document.createElement('h3');
|
|
||||||
descH3.textContent = 'Description';
|
|
||||||
descSection.appendChild(descH3);
|
|
||||||
var descP = document.createElement('p');
|
|
||||||
descP.textContent = d.desc;
|
|
||||||
descSection.appendChild(descP);
|
|
||||||
container.appendChild(descSection);
|
|
||||||
|
|
||||||
// SQL
|
|
||||||
if (d.sql) {
|
|
||||||
var sqlSection = document.createElement('div');
|
|
||||||
sqlSection.className = 'detail-section';
|
|
||||||
var sqlH3 = document.createElement('h3');
|
|
||||||
sqlH3.textContent = 'Key Query / Code';
|
|
||||||
sqlSection.appendChild(sqlH3);
|
|
||||||
var sqlBlock = document.createElement('div');
|
|
||||||
sqlBlock.className = 'sql-block';
|
|
||||||
sqlBlock.textContent = d.sql;
|
|
||||||
sqlSection.appendChild(sqlBlock);
|
|
||||||
container.appendChild(sqlSection);
|
|
||||||
}
|
|
||||||
|
|
||||||
return container;
|
|
||||||
}
|
|
||||||
|
|
||||||
function showDetail(key) {
|
|
||||||
var d = details[key];
|
|
||||||
if (!d) return;
|
|
||||||
var panel = document.getElementById('detail-panel');
|
|
||||||
document.getElementById('detail-title').textContent = d.title;
|
|
||||||
var body = document.getElementById('detail-body');
|
|
||||||
while (body.firstChild) body.removeChild(body.firstChild);
|
|
||||||
body.appendChild(buildDetailContent(d));
|
|
||||||
document.querySelectorAll('.node.selected').forEach(function(n) { n.classList.remove('selected'); });
|
|
||||||
var clicked = document.querySelector('[data-detail="' + key + '"]');
|
|
||||||
if (clicked) clicked.classList.add('selected');
|
|
||||||
panel.classList.add('open');
|
|
||||||
}
|
|
||||||
|
|
||||||
function closeDetail() {
|
|
||||||
document.getElementById('detail-panel').classList.remove('open');
|
|
||||||
document.querySelectorAll('.node.selected').forEach(function(n) { n.classList.remove('selected'); });
|
|
||||||
}
|
|
||||||
|
|
||||||
document.addEventListener('click', function(e) {
|
|
||||||
var node = e.target.closest('.node[data-detail]');
|
|
||||||
if (node) { showDetail(node.dataset.detail); return; }
|
|
||||||
if (!e.target.closest('.detail-panel') && !e.target.closest('.node')) closeDetail();
|
|
||||||
});
|
|
||||||
document.addEventListener('keydown', function(e) { if (e.key === 'Escape') closeDetail(); });
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
1260
phase-a-review.html
1260
phase-a-review.html
File diff suppressed because it is too large
Load Diff
137
plans/init-refresh-flag.md
Normal file
137
plans/init-refresh-flag.md
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
# Plan: `lore init --refresh`
|
||||||
|
|
||||||
|
**Created:** 2026-03-02
|
||||||
|
**Status:** Complete
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
When new repos are added to the config file, `lore sync` doesn't pick them up because project discovery only happens during `lore init`. Currently, users must use `--force` to overwrite their config, which is awkward.
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
|
||||||
|
Add `--refresh` flag to `lore init` that reads the existing config and updates the database to match, without overwriting the config file.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Plan
|
||||||
|
|
||||||
|
### 1. CLI Changes (`src/cli/mod.rs`)
|
||||||
|
|
||||||
|
Add to init subcommand:
|
||||||
|
- `--refresh` flag (conflicts with `--force`)
|
||||||
|
- Ensure `--robot` / `-J` propagates to init
|
||||||
|
|
||||||
|
### 2. Update `InitOptions` struct
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub struct InitOptions {
|
||||||
|
pub config_path: Option<String>,
|
||||||
|
pub force: bool,
|
||||||
|
pub non_interactive: bool,
|
||||||
|
pub refresh: bool, // NEW
|
||||||
|
pub robot_mode: bool, // NEW
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. New `RefreshResult` struct
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub struct RefreshResult {
|
||||||
|
pub user: UserInfo,
|
||||||
|
pub projects_registered: Vec<ProjectInfo>,
|
||||||
|
pub projects_failed: Vec<ProjectFailure>, // path + error message
|
||||||
|
pub orphans_found: Vec<String>, // paths in DB but not config
|
||||||
|
pub orphans_deleted: Vec<String>, // if user said yes
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ProjectFailure {
|
||||||
|
pub path: String,
|
||||||
|
pub error: String,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Main logic: `run_init_refresh()` (new function)
|
||||||
|
|
||||||
|
```
|
||||||
|
1. Load config via Config::load()
|
||||||
|
2. Resolve token, call get_current_user() → validate auth
|
||||||
|
3. For each project in config.projects:
|
||||||
|
- Call client.get_project(path)
|
||||||
|
- On success: collect for DB upsert
|
||||||
|
- On failure: collect in projects_failed
|
||||||
|
4. Query DB for all existing projects
|
||||||
|
5. Compute orphans = DB projects - config projects
|
||||||
|
6. If orphans exist:
|
||||||
|
- Robot mode: include in output, no prompt, no delete
|
||||||
|
- Interactive: prompt "Delete N orphan projects? [y/N]"
|
||||||
|
- Default N → skip deletion
|
||||||
|
- Y → delete from DB
|
||||||
|
7. Upsert validated projects into DB
|
||||||
|
8. Return RefreshResult
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Improve existing init error message
|
||||||
|
|
||||||
|
In `run_init()`, when config exists and neither `--refresh` nor `--force`:
|
||||||
|
|
||||||
|
**Current:**
|
||||||
|
> Config file exists at ~/.config/lore/config.json. Use --force to overwrite.
|
||||||
|
|
||||||
|
**New:**
|
||||||
|
> Config already exists at ~/.config/lore/config.json.
|
||||||
|
> - Use `--refresh` to register new projects from config
|
||||||
|
> - Use `--force` to overwrite the config file
|
||||||
|
|
||||||
|
### 6. Robot mode output
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ok": true,
|
||||||
|
"data": {
|
||||||
|
"mode": "refresh",
|
||||||
|
"user": { "username": "...", "name": "..." },
|
||||||
|
"projects_registered": [...],
|
||||||
|
"projects_failed": [...],
|
||||||
|
"orphans_found": ["old/project"],
|
||||||
|
"orphans_deleted": []
|
||||||
|
},
|
||||||
|
"meta": { "elapsed_ms": 123 }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. Human output
|
||||||
|
|
||||||
|
```
|
||||||
|
✓ Authenticated as @username (Full Name)
|
||||||
|
|
||||||
|
Projects
|
||||||
|
✓ group/project-a registered
|
||||||
|
✓ group/project-b registered
|
||||||
|
✗ group/nonexistent not found
|
||||||
|
|
||||||
|
Orphans
|
||||||
|
• old/removed-project
|
||||||
|
|
||||||
|
Delete 1 orphan project from database? [y/N]: n
|
||||||
|
|
||||||
|
Registered 2 projects (1 failed, 1 orphan kept)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files to Touch
|
||||||
|
|
||||||
|
1. **`src/cli/mod.rs`** — add `--refresh` and `--robot` to init subcommand args
|
||||||
|
2. **`src/cli/commands/init.rs`** — add `RefreshResult`, `run_init_refresh()`, update error message
|
||||||
|
3. **`src/main.rs`** (or CLI dispatch) — route `--refresh` to new function
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
|
||||||
|
- [x] `lore init --refresh` reads existing config and registers projects
|
||||||
|
- [x] Validates GitLab auth before processing
|
||||||
|
- [x] Orphan projects prompt with default N (interactive mode)
|
||||||
|
- [x] Robot mode outputs JSON, no prompts, includes orphans in output
|
||||||
|
- [x] Existing `lore init` (no flags) suggests `--refresh` when config exists
|
||||||
|
- [x] `--refresh` and `--force` are mutually exclusive
|
||||||
@@ -232,6 +232,7 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
|
|||||||
(
|
(
|
||||||
"init",
|
"init",
|
||||||
&[
|
&[
|
||||||
|
"--refresh",
|
||||||
"--force",
|
"--force",
|
||||||
"--non-interactive",
|
"--non-interactive",
|
||||||
"--gitlab-url",
|
"--gitlab-url",
|
||||||
|
|||||||
@@ -38,6 +38,159 @@ pub struct ProjectInfo {
|
|||||||
pub name: String,
|
pub name: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Refresh types ──
|
||||||
|
|
||||||
|
pub struct RefreshOptions {
|
||||||
|
pub config_path: Option<String>,
|
||||||
|
pub non_interactive: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct RefreshResult {
|
||||||
|
pub user: UserInfo,
|
||||||
|
pub projects_registered: Vec<ProjectInfo>,
|
||||||
|
pub projects_failed: Vec<ProjectFailure>,
|
||||||
|
pub orphans_found: Vec<String>,
|
||||||
|
pub orphans_deleted: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ProjectFailure {
|
||||||
|
pub path: String,
|
||||||
|
pub error: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Re-read existing config and register any new projects in the database.
|
||||||
|
/// Does NOT modify the config file.
|
||||||
|
pub async fn run_init_refresh(options: RefreshOptions) -> Result<RefreshResult> {
|
||||||
|
let config_path = get_config_path(options.config_path.as_deref());
|
||||||
|
|
||||||
|
if !config_path.exists() {
|
||||||
|
return Err(LoreError::ConfigNotFound {
|
||||||
|
path: config_path.display().to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let config = Config::load(options.config_path.as_deref())?;
|
||||||
|
let token = config.gitlab.resolve_token()?;
|
||||||
|
let client = GitLabClient::new(&config.gitlab.base_url, &token, None);
|
||||||
|
|
||||||
|
// Validate auth
|
||||||
|
let gitlab_user = client.get_current_user().await.map_err(|e| {
|
||||||
|
if matches!(e, LoreError::GitLabAuthFailed) {
|
||||||
|
LoreError::Other(format!(
|
||||||
|
"Authentication failed for {}",
|
||||||
|
config.gitlab.base_url
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
e
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let user = UserInfo {
|
||||||
|
username: gitlab_user.username,
|
||||||
|
name: gitlab_user.name,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Validate each project
|
||||||
|
let mut validated_projects: Vec<(ProjectInfo, GitLabProject)> = Vec::new();
|
||||||
|
let mut failed_projects: Vec<ProjectFailure> = Vec::new();
|
||||||
|
|
||||||
|
for project_config in &config.projects {
|
||||||
|
match client.get_project(&project_config.path).await {
|
||||||
|
Ok(project) => {
|
||||||
|
validated_projects.push((
|
||||||
|
ProjectInfo {
|
||||||
|
path: project_config.path.clone(),
|
||||||
|
name: project.name.clone().unwrap_or_else(|| {
|
||||||
|
project_config
|
||||||
|
.path
|
||||||
|
.split('/')
|
||||||
|
.next_back()
|
||||||
|
.unwrap_or(&project_config.path)
|
||||||
|
.to_string()
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
project,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let error_msg = if matches!(e, LoreError::GitLabNotFound { .. }) {
|
||||||
|
"not found".to_string()
|
||||||
|
} else {
|
||||||
|
e.to_string()
|
||||||
|
};
|
||||||
|
failed_projects.push(ProjectFailure {
|
||||||
|
path: project_config.path.clone(),
|
||||||
|
error: error_msg,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open database
|
||||||
|
let data_dir = get_data_dir();
|
||||||
|
let db_path = data_dir.join("lore.db");
|
||||||
|
let conn = create_connection(&db_path)?;
|
||||||
|
run_migrations(&conn)?;
|
||||||
|
|
||||||
|
// Find orphans: projects in DB but not in config
|
||||||
|
let config_paths: std::collections::HashSet<&str> =
|
||||||
|
config.projects.iter().map(|p| p.path.as_str()).collect();
|
||||||
|
|
||||||
|
let mut stmt = conn.prepare("SELECT path_with_namespace FROM projects")?;
|
||||||
|
let db_projects: Vec<String> = stmt
|
||||||
|
.query_map([], |row| row.get(0))?
|
||||||
|
.filter_map(|r| r.ok())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let orphans: Vec<String> = db_projects
|
||||||
|
.into_iter()
|
||||||
|
.filter(|p| !config_paths.contains(p.as_str()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Upsert validated projects
|
||||||
|
for (_, gitlab_project) in &validated_projects {
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO projects (gitlab_project_id, path_with_namespace, default_branch, web_url)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
ON CONFLICT(gitlab_project_id) DO UPDATE SET
|
||||||
|
path_with_namespace = excluded.path_with_namespace,
|
||||||
|
default_branch = excluded.default_branch,
|
||||||
|
web_url = excluded.web_url",
|
||||||
|
(
|
||||||
|
gitlab_project.id,
|
||||||
|
&gitlab_project.path_with_namespace,
|
||||||
|
&gitlab_project.default_branch,
|
||||||
|
&gitlab_project.web_url,
|
||||||
|
),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(RefreshResult {
|
||||||
|
user,
|
||||||
|
projects_registered: validated_projects.into_iter().map(|(p, _)| p).collect(),
|
||||||
|
projects_failed: failed_projects,
|
||||||
|
orphans_found: orphans,
|
||||||
|
orphans_deleted: Vec::new(), // Caller handles deletion after user prompt
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete orphan projects from the database.
|
||||||
|
pub fn delete_orphan_projects(config_path: Option<&str>, orphans: &[String]) -> Result<usize> {
|
||||||
|
let data_dir = get_data_dir();
|
||||||
|
let db_path = data_dir.join("lore.db");
|
||||||
|
let conn = create_connection(&db_path)?;
|
||||||
|
|
||||||
|
let _ = config_path; // Reserved for future use
|
||||||
|
|
||||||
|
let mut deleted = 0;
|
||||||
|
for path in orphans {
|
||||||
|
let rows = conn.execute("DELETE FROM projects WHERE path_with_namespace = ?", [path])?;
|
||||||
|
deleted += rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(deleted)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitResult> {
|
pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result<InitResult> {
|
||||||
let config_path = get_config_path(options.config_path.as_deref());
|
let config_path = get_config_path(options.config_path.as_deref());
|
||||||
let data_dir = get_data_dir();
|
let data_dir = get_data_dir();
|
||||||
|
|||||||
@@ -42,7 +42,10 @@ pub use ingest::{
|
|||||||
DryRunPreview, IngestDisplay, print_dry_run_preview, print_dry_run_preview_json,
|
DryRunPreview, IngestDisplay, print_dry_run_preview, print_dry_run_preview_json,
|
||||||
print_ingest_summary, print_ingest_summary_json, run_ingest, run_ingest_dry_run,
|
print_ingest_summary, print_ingest_summary_json, run_ingest, run_ingest_dry_run,
|
||||||
};
|
};
|
||||||
pub use init::{InitInputs, InitOptions, InitResult, run_init, run_token_set, run_token_show};
|
pub use init::{
|
||||||
|
InitInputs, InitOptions, InitResult, RefreshOptions, RefreshResult, delete_orphan_projects,
|
||||||
|
run_init, run_init_refresh, run_token_set, run_token_show,
|
||||||
|
};
|
||||||
pub use list::{
|
pub use list::{
|
||||||
ListFilters, MrListFilters, NoteListFilters, open_issue_in_browser, open_mr_in_browser,
|
ListFilters, MrListFilters, NoteListFilters, open_issue_in_browser, open_mr_in_browser,
|
||||||
print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json,
|
print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json,
|
||||||
|
|||||||
@@ -13,7 +13,9 @@ pub(super) fn query_active(
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
include_closed: bool,
|
include_closed: bool,
|
||||||
) -> Result<ActiveResult> {
|
) -> Result<ActiveResult> {
|
||||||
let limit_plus_one = (limit + 1) as i64;
|
// Prevent overflow: saturating_add caps at usize::MAX instead of wrapping to 0.
|
||||||
|
// The .min() ensures the value fits in i64 for SQLite's LIMIT clause.
|
||||||
|
let limit_plus_one = limit.saturating_add(1).min(i64::MAX as usize) as i64;
|
||||||
|
|
||||||
// State filter for open-entities-only (default behavior)
|
// State filter for open-entities-only (default behavior)
|
||||||
let state_joins = if include_closed {
|
let state_joins = if include_closed {
|
||||||
|
|||||||
@@ -16,7 +16,9 @@ pub(super) fn query_workload(
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
include_closed: bool,
|
include_closed: bool,
|
||||||
) -> Result<WorkloadResult> {
|
) -> Result<WorkloadResult> {
|
||||||
let limit_plus_one = (limit + 1) as i64;
|
// Prevent overflow: saturating_add caps at usize::MAX instead of wrapping to 0.
|
||||||
|
// The .min() ensures the value fits in i64 for SQLite's LIMIT clause.
|
||||||
|
let limit_plus_one = limit.saturating_add(1).min(i64::MAX as usize) as i64;
|
||||||
|
|
||||||
// Query 1: Open issues assigned to user
|
// Query 1: Open issues assigned to user
|
||||||
let issues_sql = "SELECT i.iid,
|
let issues_sql = "SELECT i.iid,
|
||||||
|
|||||||
@@ -3394,3 +3394,38 @@ fn active_excludes_closed_entity_discussions() {
|
|||||||
assert_eq!(result.discussions.len(), 2);
|
assert_eq!(result.discussions.len(), 2);
|
||||||
assert_eq!(result.total_unresolved_in_window, 2);
|
assert_eq!(result.total_unresolved_in_window, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── Regression: Unlimited limit must not overflow ─────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn workload_unlimited_limit_returns_results() {
|
||||||
|
// Regression test for integer overflow bug: when limit=usize::MAX, the
|
||||||
|
// expression (limit + 1) wrapped to 0, causing LIMIT 0 to return no rows.
|
||||||
|
let conn = setup_test_db();
|
||||||
|
insert_project(&conn, 1, "group/repo");
|
||||||
|
insert_issue(&conn, 1, 1, 100, "alice");
|
||||||
|
insert_assignee(&conn, 1, "alice");
|
||||||
|
|
||||||
|
// usize::MAX simulates the "unlimited" default when --limit is omitted
|
||||||
|
let result = query_workload(&conn, "alice", None, None, usize::MAX, false).unwrap();
|
||||||
|
assert!(
|
||||||
|
!result.assigned_issues.is_empty(),
|
||||||
|
"usize::MAX limit should return results, not overflow to LIMIT 0"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn active_unlimited_limit_returns_results() {
|
||||||
|
// Same regression test for query_active
|
||||||
|
let conn = setup_test_db();
|
||||||
|
insert_project(&conn, 1, "group/repo");
|
||||||
|
insert_issue(&conn, 1, 1, 100, "alice");
|
||||||
|
insert_discussion(&conn, 1, 1, None, Some(1), true, false);
|
||||||
|
insert_note(&conn, 1, 1, 1, "alice");
|
||||||
|
|
||||||
|
let result = query_active(&conn, None, 0, usize::MAX, false).unwrap();
|
||||||
|
assert!(
|
||||||
|
!result.discussions.is_empty(),
|
||||||
|
"usize::MAX limit should return results, not overflow to LIMIT 0"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|||||||
@@ -163,10 +163,15 @@ pub enum Commands {
|
|||||||
/// Initialize configuration and database
|
/// Initialize configuration and database
|
||||||
#[command(after_help = "\x1b[1mExamples:\x1b[0m
|
#[command(after_help = "\x1b[1mExamples:\x1b[0m
|
||||||
lore init # Interactive setup
|
lore init # Interactive setup
|
||||||
|
lore init --refresh # Register projects from existing config
|
||||||
lore init --force # Overwrite existing config
|
lore init --force # Overwrite existing config
|
||||||
lore --robot init --gitlab-url https://gitlab.com \\
|
lore --robot init --gitlab-url https://gitlab.com \\
|
||||||
--token-env-var GITLAB_TOKEN --projects group/repo # Non-interactive setup")]
|
--token-env-var GITLAB_TOKEN --projects group/repo # Non-interactive setup")]
|
||||||
Init {
|
Init {
|
||||||
|
/// Re-read config and register any new projects in the database
|
||||||
|
#[arg(long, conflicts_with = "force")]
|
||||||
|
refresh: bool,
|
||||||
|
|
||||||
/// Skip overwrite confirmation
|
/// Skip overwrite confirmation
|
||||||
#[arg(short = 'f', long)]
|
#[arg(short = 'f', long)]
|
||||||
force: bool,
|
force: bool,
|
||||||
|
|||||||
@@ -576,6 +576,23 @@ impl GitLabClient {
|
|||||||
|
|
||||||
Ok(discussions)
|
Ok(discussions)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn fetch_all_issue_discussions(
|
||||||
|
&self,
|
||||||
|
gitlab_project_id: i64,
|
||||||
|
issue_iid: i64,
|
||||||
|
) -> Result<Vec<GitLabDiscussion>> {
|
||||||
|
use futures::StreamExt;
|
||||||
|
|
||||||
|
let mut discussions = Vec::new();
|
||||||
|
let mut stream = self.paginate_issue_discussions(gitlab_project_id, issue_iid);
|
||||||
|
|
||||||
|
while let Some(result) = stream.next().await {
|
||||||
|
discussions.push(result?);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(discussions)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GitLabClient {
|
impl GitLabClient {
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
use futures::StreamExt;
|
|
||||||
use rusqlite::{Connection, params};
|
use rusqlite::{Connection, params};
|
||||||
use tracing::{debug, warn};
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
@@ -9,8 +8,9 @@ use crate::core::time::now_ms;
|
|||||||
use crate::documents::SourceType;
|
use crate::documents::SourceType;
|
||||||
use crate::gitlab::GitLabClient;
|
use crate::gitlab::GitLabClient;
|
||||||
use crate::gitlab::transformers::{
|
use crate::gitlab::transformers::{
|
||||||
NormalizedNote, NoteableRef, transform_discussion, transform_notes,
|
NormalizedDiscussion, NormalizedNote, NoteableRef, transform_discussion, transform_notes,
|
||||||
};
|
};
|
||||||
|
use crate::gitlab::types::GitLabDiscussion;
|
||||||
use crate::ingestion::dirty_tracker;
|
use crate::ingestion::dirty_tracker;
|
||||||
|
|
||||||
use super::issues::IssueForDiscussionSync;
|
use super::issues::IssueForDiscussionSync;
|
||||||
@@ -29,109 +29,113 @@ pub struct IngestDiscussionsResult {
|
|||||||
pub stale_discussions_removed: usize,
|
pub stale_discussions_removed: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn ingest_issue_discussions(
|
// ═══════════════════════════════════════════════════════════════════════
|
||||||
conn: &Connection,
|
// Prefetch pattern — concurrent HTTP fetch, sequential DB write
|
||||||
client: &GitLabClient,
|
// ═══════════════════════════════════════════════════════════════════════
|
||||||
config: &Config,
|
|
||||||
gitlab_project_id: i64,
|
|
||||||
local_project_id: i64,
|
|
||||||
issues: &[IssueForDiscussionSync],
|
|
||||||
) -> Result<IngestDiscussionsResult> {
|
|
||||||
let mut total_result = IngestDiscussionsResult::default();
|
|
||||||
|
|
||||||
for issue in issues {
|
#[derive(Debug)]
|
||||||
let result = ingest_discussions_for_issue(
|
pub struct PrefetchedIssueDiscussions {
|
||||||
conn,
|
pub issue: IssueForDiscussionSync,
|
||||||
client,
|
pub discussions: Vec<PrefetchedDiscussion>,
|
||||||
config,
|
pub fetch_error: Option<String>,
|
||||||
gitlab_project_id,
|
|
||||||
local_project_id,
|
|
||||||
issue,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
total_result.discussions_fetched += result.discussions_fetched;
|
|
||||||
total_result.discussions_upserted += result.discussions_upserted;
|
|
||||||
total_result.notes_upserted += result.notes_upserted;
|
|
||||||
total_result.stale_discussions_removed += result.stale_discussions_removed;
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!(
|
|
||||||
issues_processed = issues.len(),
|
|
||||||
discussions_fetched = total_result.discussions_fetched,
|
|
||||||
discussions_upserted = total_result.discussions_upserted,
|
|
||||||
notes_upserted = total_result.notes_upserted,
|
|
||||||
stale_removed = total_result.stale_discussions_removed,
|
|
||||||
"Discussion ingestion complete"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(total_result)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn ingest_discussions_for_issue(
|
#[derive(Debug)]
|
||||||
conn: &Connection,
|
pub struct PrefetchedDiscussion {
|
||||||
|
pub raw: GitLabDiscussion,
|
||||||
|
pub normalized: NormalizedDiscussion,
|
||||||
|
pub notes: Vec<NormalizedNote>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prefetch all discussions for an issue (HTTP only, no DB writes).
|
||||||
|
/// This function is designed to be called concurrently via `join_all`.
|
||||||
|
pub async fn prefetch_issue_discussions(
|
||||||
client: &GitLabClient,
|
client: &GitLabClient,
|
||||||
config: &Config,
|
|
||||||
gitlab_project_id: i64,
|
gitlab_project_id: i64,
|
||||||
local_project_id: i64,
|
local_project_id: i64,
|
||||||
issue: &IssueForDiscussionSync,
|
issue: IssueForDiscussionSync,
|
||||||
) -> Result<IngestDiscussionsResult> {
|
) -> PrefetchedIssueDiscussions {
|
||||||
let mut result = IngestDiscussionsResult::default();
|
debug!(issue_iid = issue.iid, "Prefetching discussions for issue");
|
||||||
|
|
||||||
debug!(
|
let raw_discussions = match client
|
||||||
issue_iid = issue.iid,
|
.fetch_all_issue_discussions(gitlab_project_id, issue.iid)
|
||||||
local_issue_id = issue.local_issue_id,
|
.await
|
||||||
"Fetching discussions for issue"
|
{
|
||||||
);
|
Ok(d) => d,
|
||||||
|
Err(e) => {
|
||||||
|
return PrefetchedIssueDiscussions {
|
||||||
|
issue,
|
||||||
|
discussions: Vec::new(),
|
||||||
|
fetch_error: Some(e.to_string()),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let mut discussions_stream = client.paginate_issue_discussions(gitlab_project_id, issue.iid);
|
let mut discussions = Vec::with_capacity(raw_discussions.len());
|
||||||
|
|
||||||
let mut seen_discussion_ids: Vec<String> = Vec::new();
|
|
||||||
let mut pagination_error: Option<crate::core::error::LoreError> = None;
|
|
||||||
|
|
||||||
let run_seen_at = now_ms();
|
|
||||||
|
|
||||||
while let Some(disc_result) = discussions_stream.next().await {
|
|
||||||
let gitlab_discussion = match disc_result {
|
|
||||||
Ok(d) => d,
|
|
||||||
Err(e) => {
|
|
||||||
warn!(
|
|
||||||
issue_iid = issue.iid,
|
|
||||||
error = %e,
|
|
||||||
"Error during discussion pagination, skipping stale removal"
|
|
||||||
);
|
|
||||||
pagination_error = Some(e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
result.discussions_fetched += 1;
|
|
||||||
|
|
||||||
let payload_bytes = serde_json::to_vec(&gitlab_discussion)?;
|
|
||||||
|
|
||||||
|
for raw in raw_discussions {
|
||||||
let normalized = transform_discussion(
|
let normalized = transform_discussion(
|
||||||
&gitlab_discussion,
|
&raw,
|
||||||
local_project_id,
|
local_project_id,
|
||||||
NoteableRef::Issue(issue.local_issue_id),
|
NoteableRef::Issue(issue.local_issue_id),
|
||||||
);
|
);
|
||||||
|
let notes = transform_notes(&raw, local_project_id);
|
||||||
|
|
||||||
|
discussions.push(PrefetchedDiscussion {
|
||||||
|
raw,
|
||||||
|
normalized,
|
||||||
|
notes,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
PrefetchedIssueDiscussions {
|
||||||
|
issue,
|
||||||
|
discussions,
|
||||||
|
fetch_error: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write prefetched discussions to the database (sequential DB writes).
|
||||||
|
pub fn write_prefetched_issue_discussions(
|
||||||
|
conn: &Connection,
|
||||||
|
config: &Config,
|
||||||
|
local_project_id: i64,
|
||||||
|
prefetched: PrefetchedIssueDiscussions,
|
||||||
|
) -> Result<IngestDiscussionsResult> {
|
||||||
|
let mut result = IngestDiscussionsResult::default();
|
||||||
|
let issue = &prefetched.issue;
|
||||||
|
|
||||||
|
if let Some(error) = &prefetched.fetch_error {
|
||||||
|
warn!(issue_iid = issue.iid, error = %error, "Prefetch failed for issue");
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
let run_seen_at = now_ms();
|
||||||
|
let mut seen_discussion_ids: Vec<String> = Vec::with_capacity(prefetched.discussions.len());
|
||||||
|
|
||||||
|
for disc in &prefetched.discussions {
|
||||||
|
result.discussions_fetched += 1;
|
||||||
|
let notes_count = disc.notes.len();
|
||||||
|
|
||||||
let tx = conn.unchecked_transaction()?;
|
let tx = conn.unchecked_transaction()?;
|
||||||
|
|
||||||
|
let payload_bytes = serde_json::to_vec(&disc.raw)?;
|
||||||
let payload_id = store_payload(
|
let payload_id = store_payload(
|
||||||
&tx,
|
&tx,
|
||||||
StorePayloadOptions {
|
StorePayloadOptions {
|
||||||
project_id: Some(local_project_id),
|
project_id: Some(local_project_id),
|
||||||
resource_type: "discussion",
|
resource_type: "discussion",
|
||||||
gitlab_id: &gitlab_discussion.id,
|
gitlab_id: &disc.raw.id,
|
||||||
json_bytes: &payload_bytes,
|
json_bytes: &payload_bytes,
|
||||||
compress: config.storage.compress_raw_payloads,
|
compress: config.storage.compress_raw_payloads,
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
upsert_discussion(&tx, &normalized, payload_id)?;
|
upsert_discussion(&tx, &disc.normalized, payload_id)?;
|
||||||
|
|
||||||
let local_discussion_id: i64 = tx.query_row(
|
let local_discussion_id: i64 = tx.query_row(
|
||||||
"SELECT id FROM discussions WHERE project_id = ? AND gitlab_discussion_id = ?",
|
"SELECT id FROM discussions WHERE project_id = ? AND gitlab_discussion_id = ?",
|
||||||
(local_project_id, &normalized.gitlab_discussion_id),
|
(local_project_id, &disc.normalized.gitlab_discussion_id),
|
||||||
|row| row.get(0),
|
|row| row.get(0),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@@ -147,12 +151,8 @@ async fn ingest_discussions_for_issue(
|
|||||||
params![now_ms(), local_discussion_id],
|
params![now_ms(), local_discussion_id],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let notes = transform_notes(&gitlab_discussion, local_project_id);
|
for note in &disc.notes {
|
||||||
let notes_count = notes.len();
|
let outcome = upsert_note_for_issue(&tx, local_discussion_id, note, run_seen_at, None)?;
|
||||||
|
|
||||||
for note in notes {
|
|
||||||
let outcome =
|
|
||||||
upsert_note_for_issue(&tx, local_discussion_id, ¬e, run_seen_at, None)?;
|
|
||||||
if !note.is_system && outcome.changed_semantics {
|
if !note.is_system && outcome.changed_semantics {
|
||||||
dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
|
dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
|
||||||
}
|
}
|
||||||
@@ -164,26 +164,22 @@ async fn ingest_discussions_for_issue(
|
|||||||
|
|
||||||
result.discussions_upserted += 1;
|
result.discussions_upserted += 1;
|
||||||
result.notes_upserted += notes_count;
|
result.notes_upserted += notes_count;
|
||||||
seen_discussion_ids.push(normalized.gitlab_discussion_id.clone());
|
seen_discussion_ids.push(disc.normalized.gitlab_discussion_id.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
if pagination_error.is_none() {
|
// Only do stale removal if fetch succeeded
|
||||||
let removed = remove_stale_discussions(conn, issue.local_issue_id, &seen_discussion_ids)?;
|
let removed = remove_stale_discussions(conn, issue.local_issue_id, &seen_discussion_ids)?;
|
||||||
result.stale_discussions_removed = removed;
|
result.stale_discussions_removed = removed;
|
||||||
|
|
||||||
update_issue_sync_timestamp(conn, issue.local_issue_id, issue.updated_at)?;
|
update_issue_sync_timestamp(conn, issue.local_issue_id, issue.updated_at)?;
|
||||||
} else if let Some(err) = pagination_error {
|
|
||||||
warn!(
|
|
||||||
issue_iid = issue.iid,
|
|
||||||
discussions_seen = seen_discussion_ids.len(),
|
|
||||||
"Skipping stale removal due to pagination error"
|
|
||||||
);
|
|
||||||
return Err(err);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ═══════════════════════════════════════════════════════════════════════
|
||||||
|
// Database helpers
|
||||||
|
// ═══════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
fn upsert_discussion(
|
fn upsert_discussion(
|
||||||
conn: &Connection,
|
conn: &Connection,
|
||||||
discussion: &crate::gitlab::transformers::NormalizedDiscussion,
|
discussion: &crate::gitlab::transformers::NormalizedDiscussion,
|
||||||
|
|||||||
@@ -8,7 +8,9 @@ pub mod mr_discussions;
|
|||||||
pub mod orchestrator;
|
pub mod orchestrator;
|
||||||
pub(crate) mod surgical;
|
pub(crate) mod surgical;
|
||||||
|
|
||||||
pub use discussions::{IngestDiscussionsResult, ingest_issue_discussions};
|
pub use discussions::{
|
||||||
|
IngestDiscussionsResult, prefetch_issue_discussions, write_prefetched_issue_discussions,
|
||||||
|
};
|
||||||
pub use issues::{IngestIssuesResult, IssueForDiscussionSync, ingest_issues};
|
pub use issues::{IngestIssuesResult, IssueForDiscussionSync, ingest_issues};
|
||||||
pub use merge_requests::{
|
pub use merge_requests::{
|
||||||
IngestMergeRequestsResult, MrForDiscussionSync, get_mrs_needing_discussion_sync,
|
IngestMergeRequestsResult, MrForDiscussionSync, get_mrs_needing_discussion_sync,
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ use crate::core::references::{
|
|||||||
use crate::core::shutdown::ShutdownSignal;
|
use crate::core::shutdown::ShutdownSignal;
|
||||||
use crate::gitlab::GitLabClient;
|
use crate::gitlab::GitLabClient;
|
||||||
|
|
||||||
use super::discussions::ingest_issue_discussions;
|
use super::discussions::{prefetch_issue_discussions, write_prefetched_issue_discussions};
|
||||||
use super::issues::{IssueForDiscussionSync, ingest_issues};
|
use super::issues::{IssueForDiscussionSync, ingest_issues};
|
||||||
use super::merge_requests::{
|
use super::merge_requests::{
|
||||||
MrForDiscussionSync, get_mrs_needing_discussion_sync, ingest_merge_requests,
|
MrForDiscussionSync, get_mrs_needing_discussion_sync, ingest_merge_requests,
|
||||||
@@ -471,27 +471,30 @@ async fn sync_discussions_sequential(
|
|||||||
let total = issues.len();
|
let total = issues.len();
|
||||||
|
|
||||||
let mut results = Vec::with_capacity(issues.len());
|
let mut results = Vec::with_capacity(issues.len());
|
||||||
|
let mut processed = 0;
|
||||||
|
|
||||||
for chunk in issues.chunks(batch_size) {
|
for chunk in issues.chunks(batch_size) {
|
||||||
if signal.is_cancelled() {
|
if signal.is_cancelled() {
|
||||||
debug!("Shutdown requested during discussion sync, returning partial results");
|
debug!("Shutdown requested during discussion sync, returning partial results");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
for issue in chunk {
|
|
||||||
let disc_result = ingest_issue_discussions(
|
// Concurrent HTTP prefetch for all issues in this batch
|
||||||
conn,
|
let prefetch_futures = chunk.iter().map(|issue| {
|
||||||
client,
|
prefetch_issue_discussions(client, gitlab_project_id, local_project_id, issue.clone())
|
||||||
config,
|
});
|
||||||
gitlab_project_id,
|
let prefetched_batch = join_all(prefetch_futures).await;
|
||||||
local_project_id,
|
|
||||||
std::slice::from_ref(issue),
|
// Sequential DB writes
|
||||||
)
|
for prefetched in prefetched_batch {
|
||||||
.await?;
|
let disc_result =
|
||||||
|
write_prefetched_issue_discussions(conn, config, local_project_id, prefetched)?;
|
||||||
results.push(disc_result);
|
results.push(disc_result);
|
||||||
|
processed += 1;
|
||||||
|
|
||||||
if let Some(cb) = progress {
|
if let Some(cb) = progress {
|
||||||
cb(ProgressEvent::DiscussionSynced {
|
cb(ProgressEvent::DiscussionSynced {
|
||||||
current: results.len(),
|
current: processed,
|
||||||
total,
|
total,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,7 +9,9 @@ use crate::documents::SourceType;
|
|||||||
use crate::gitlab::GitLabClient;
|
use crate::gitlab::GitLabClient;
|
||||||
use crate::gitlab::types::{GitLabIssue, GitLabMergeRequest};
|
use crate::gitlab::types::{GitLabIssue, GitLabMergeRequest};
|
||||||
use crate::ingestion::dirty_tracker;
|
use crate::ingestion::dirty_tracker;
|
||||||
use crate::ingestion::discussions::ingest_issue_discussions;
|
use crate::ingestion::discussions::{
|
||||||
|
prefetch_issue_discussions, write_prefetched_issue_discussions,
|
||||||
|
};
|
||||||
use crate::ingestion::issues::{IssueForDiscussionSync, process_single_issue};
|
use crate::ingestion::issues::{IssueForDiscussionSync, process_single_issue};
|
||||||
use crate::ingestion::merge_requests::{MrForDiscussionSync, process_single_mr};
|
use crate::ingestion::merge_requests::{MrForDiscussionSync, process_single_mr};
|
||||||
use crate::ingestion::mr_diffs::upsert_mr_file_changes;
|
use crate::ingestion::mr_diffs::upsert_mr_file_changes;
|
||||||
@@ -289,16 +291,9 @@ pub(crate) async fn fetch_dependents_for_issue(
|
|||||||
iid,
|
iid,
|
||||||
updated_at: 0, // not used for filtering in surgical mode
|
updated_at: 0, // not used for filtering in surgical mode
|
||||||
};
|
};
|
||||||
match ingest_issue_discussions(
|
let prefetched =
|
||||||
conn,
|
prefetch_issue_discussions(client, gitlab_project_id, project_id, sync_item).await;
|
||||||
client,
|
match write_prefetched_issue_discussions(conn, config, project_id, prefetched) {
|
||||||
config,
|
|
||||||
gitlab_project_id,
|
|
||||||
project_id,
|
|
||||||
&[sync_item],
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
Ok(disc_result) => {
|
Ok(disc_result) => {
|
||||||
result.discussions_fetched = disc_result.discussions_fetched;
|
result.discussions_fetched = disc_result.discussions_fetched;
|
||||||
}
|
}
|
||||||
|
|||||||
257
src/main.rs
257
src/main.rs
@@ -10,24 +10,24 @@ use lore::Config;
|
|||||||
use lore::cli::autocorrect::{self, CorrectionResult};
|
use lore::cli::autocorrect::{self, CorrectionResult};
|
||||||
use lore::cli::commands::{
|
use lore::cli::commands::{
|
||||||
IngestDisplay, InitInputs, InitOptions, InitResult, ListFilters, MrListFilters,
|
IngestDisplay, InitInputs, InitOptions, InitResult, ListFilters, MrListFilters,
|
||||||
NoteListFilters, SearchCliFilters, SyncOptions, TimelineParams, open_issue_in_browser,
|
NoteListFilters, RefreshOptions, RefreshResult, SearchCliFilters, SyncOptions, TimelineParams,
|
||||||
open_mr_in_browser, parse_trace_path, print_count, print_count_json, print_cron_install,
|
delete_orphan_projects, open_issue_in_browser, open_mr_in_browser, parse_trace_path,
|
||||||
print_cron_install_json, print_cron_status, print_cron_status_json, print_cron_uninstall,
|
print_count, print_count_json, print_cron_install, print_cron_install_json, print_cron_status,
|
||||||
print_cron_uninstall_json, print_doctor_results, print_drift_human, print_drift_json,
|
print_cron_status_json, print_cron_uninstall, print_cron_uninstall_json, print_doctor_results,
|
||||||
print_dry_run_preview, print_dry_run_preview_json, print_embed, print_embed_json,
|
print_drift_human, print_drift_json, print_dry_run_preview, print_dry_run_preview_json,
|
||||||
print_event_count, print_event_count_json, print_file_history, print_file_history_json,
|
print_embed, print_embed_json, print_event_count, print_event_count_json, print_file_history,
|
||||||
print_generate_docs, print_generate_docs_json, print_ingest_summary, print_ingest_summary_json,
|
print_file_history_json, print_generate_docs, print_generate_docs_json, print_ingest_summary,
|
||||||
print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json,
|
print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs,
|
||||||
print_list_notes, print_list_notes_json, print_related_human, print_related_json,
|
print_list_mrs_json, print_list_notes, print_list_notes_json, print_related_human,
|
||||||
print_search_results, print_search_results_json, print_show_issue, print_show_issue_json,
|
print_related_json, print_search_results, print_search_results_json, print_show_issue,
|
||||||
print_show_mr, print_show_mr_json, print_stats, print_stats_json, print_sync, print_sync_json,
|
print_show_issue_json, print_show_mr, print_show_mr_json, print_stats, print_stats_json,
|
||||||
print_sync_status, print_sync_status_json, print_timeline, print_timeline_json_with_meta,
|
print_sync, print_sync_json, print_sync_status, print_sync_status_json, print_timeline,
|
||||||
print_trace, print_trace_json, print_who_human, print_who_json, query_notes, run_auth_test,
|
print_timeline_json_with_meta, print_trace, print_trace_json, print_who_human, print_who_json,
|
||||||
run_count, run_count_events, run_cron_install, run_cron_status, run_cron_uninstall, run_doctor,
|
query_notes, run_auth_test, run_count, run_count_events, run_cron_install, run_cron_status,
|
||||||
run_drift, run_embed, run_file_history, run_generate_docs, run_ingest, run_ingest_dry_run,
|
run_cron_uninstall, run_doctor, run_drift, run_embed, run_file_history, run_generate_docs,
|
||||||
run_init, run_list_issues, run_list_mrs, run_me, run_related, run_search, run_show_issue,
|
run_ingest, run_ingest_dry_run, run_init, run_init_refresh, run_list_issues, run_list_mrs,
|
||||||
run_show_mr, run_stats, run_sync, run_sync_status, run_timeline, run_token_set, run_token_show,
|
run_me, run_related, run_search, run_show_issue, run_show_mr, run_stats, run_sync,
|
||||||
run_who,
|
run_sync_status, run_timeline, run_token_set, run_token_show, run_who,
|
||||||
};
|
};
|
||||||
use lore::cli::render::{ColorMode, GlyphMode, Icons, LoreRenderer, Theme};
|
use lore::cli::render::{ColorMode, GlyphMode, Icons, LoreRenderer, Theme};
|
||||||
use lore::cli::robot::{RobotMeta, strip_schemas};
|
use lore::cli::robot::{RobotMeta, strip_schemas};
|
||||||
@@ -264,6 +264,7 @@ async fn main() {
|
|||||||
Some(Commands::Version) => handle_version(robot_mode),
|
Some(Commands::Version) => handle_version(robot_mode),
|
||||||
Some(Commands::Completions { shell }) => handle_completions(&shell),
|
Some(Commands::Completions { shell }) => handle_completions(&shell),
|
||||||
Some(Commands::Init {
|
Some(Commands::Init {
|
||||||
|
refresh,
|
||||||
force,
|
force,
|
||||||
non_interactive,
|
non_interactive,
|
||||||
gitlab_url,
|
gitlab_url,
|
||||||
@@ -273,6 +274,7 @@ async fn main() {
|
|||||||
}) => {
|
}) => {
|
||||||
handle_init(
|
handle_init(
|
||||||
cli.config.as_deref(),
|
cli.config.as_deref(),
|
||||||
|
refresh,
|
||||||
force,
|
force,
|
||||||
non_interactive,
|
non_interactive,
|
||||||
robot_mode,
|
robot_mode,
|
||||||
@@ -1314,9 +1316,197 @@ fn print_init_json(result: &InitResult) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Refresh JSON types ──
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct RefreshOutput {
|
||||||
|
ok: bool,
|
||||||
|
data: RefreshOutputData,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct RefreshOutputData {
|
||||||
|
mode: &'static str,
|
||||||
|
user: InitOutputUser,
|
||||||
|
projects_registered: Vec<InitOutputProject>,
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
projects_failed: Vec<RefreshOutputFailure>,
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
orphans_found: Vec<String>,
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
orphans_deleted: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct RefreshOutputFailure {
|
||||||
|
path: String,
|
||||||
|
error: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_refresh_json(result: &RefreshResult) {
|
||||||
|
let output = RefreshOutput {
|
||||||
|
ok: true,
|
||||||
|
data: RefreshOutputData {
|
||||||
|
mode: "refresh",
|
||||||
|
user: InitOutputUser {
|
||||||
|
username: result.user.username.clone(),
|
||||||
|
name: result.user.name.clone(),
|
||||||
|
},
|
||||||
|
projects_registered: result
|
||||||
|
.projects_registered
|
||||||
|
.iter()
|
||||||
|
.map(|p| InitOutputProject {
|
||||||
|
path: p.path.clone(),
|
||||||
|
name: p.name.clone(),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
projects_failed: result
|
||||||
|
.projects_failed
|
||||||
|
.iter()
|
||||||
|
.map(|p| RefreshOutputFailure {
|
||||||
|
path: p.path.clone(),
|
||||||
|
error: p.error.clone(),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
orphans_found: result.orphans_found.clone(),
|
||||||
|
orphans_deleted: result.orphans_deleted.clone(),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
serde_json::to_string(&output).unwrap_or_else(|e| {
|
||||||
|
format!(r#"{{"ok":false,"error":{{"code":"INTERNAL_ERROR","message":"JSON serialization failed: {e}"}}}}"#)
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_init_refresh(
|
||||||
|
config_override: Option<&str>,
|
||||||
|
non_interactive: bool,
|
||||||
|
robot_mode: bool,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let mut result = run_init_refresh(RefreshOptions {
|
||||||
|
config_path: config_override.map(String::from),
|
||||||
|
non_interactive,
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Handle orphan deletion prompt (interactive only)
|
||||||
|
let mut orphans_deleted: Vec<String> = Vec::new();
|
||||||
|
if !result.orphans_found.is_empty() && !robot_mode && !non_interactive {
|
||||||
|
println!(
|
||||||
|
"\n{}",
|
||||||
|
Theme::warning().render(&format!(
|
||||||
|
"Found {} orphan project{} in database (not in config):",
|
||||||
|
result.orphans_found.len(),
|
||||||
|
if result.orphans_found.len() == 1 {
|
||||||
|
""
|
||||||
|
} else {
|
||||||
|
"s"
|
||||||
|
}
|
||||||
|
))
|
||||||
|
);
|
||||||
|
for orphan in &result.orphans_found {
|
||||||
|
println!(" {}", Theme::muted().render(&format!("• {orphan}")));
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
let confirm = Confirm::new()
|
||||||
|
.with_prompt(format!(
|
||||||
|
"Delete {} orphan project{} from database?",
|
||||||
|
result.orphans_found.len(),
|
||||||
|
if result.orphans_found.len() == 1 {
|
||||||
|
""
|
||||||
|
} else {
|
||||||
|
"s"
|
||||||
|
}
|
||||||
|
))
|
||||||
|
.default(false)
|
||||||
|
.interact()?;
|
||||||
|
|
||||||
|
if confirm {
|
||||||
|
let deleted = delete_orphan_projects(config_override, &result.orphans_found)?;
|
||||||
|
orphans_deleted = result.orphans_found.clone();
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
Theme::success().render(&format!(" Deleted {deleted} orphan project(s)"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.orphans_deleted = orphans_deleted;
|
||||||
|
|
||||||
|
if robot_mode {
|
||||||
|
print_refresh_json(&result);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Human output
|
||||||
|
println!(
|
||||||
|
"\n{}",
|
||||||
|
Theme::success().render(&format!(
|
||||||
|
"\u{2713} Authenticated as @{} ({})",
|
||||||
|
result.user.username, result.user.name
|
||||||
|
))
|
||||||
|
);
|
||||||
|
|
||||||
|
if !result.projects_registered.is_empty() {
|
||||||
|
println!("\n {}", Theme::bold().render("Projects"));
|
||||||
|
for project in &result.projects_registered {
|
||||||
|
println!(
|
||||||
|
" {} {:<40} registered",
|
||||||
|
Theme::success().render("\u{2713}"),
|
||||||
|
project.path
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !result.projects_failed.is_empty() {
|
||||||
|
for failure in &result.projects_failed {
|
||||||
|
println!(
|
||||||
|
" {} {:<40} {}",
|
||||||
|
Theme::error().render("\u{2717}"),
|
||||||
|
failure.path,
|
||||||
|
failure.error
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
let registered = result.projects_registered.len();
|
||||||
|
let failed = result.projects_failed.len();
|
||||||
|
let orphans_kept = result.orphans_found.len() - result.orphans_deleted.len();
|
||||||
|
|
||||||
|
let mut summary_parts: Vec<String> = Vec::new();
|
||||||
|
summary_parts.push(format!(
|
||||||
|
"{} project{} registered",
|
||||||
|
registered,
|
||||||
|
if registered == 1 { "" } else { "s" }
|
||||||
|
));
|
||||||
|
if failed > 0 {
|
||||||
|
summary_parts.push(format!("{failed} failed"));
|
||||||
|
}
|
||||||
|
if !result.orphans_deleted.is_empty() {
|
||||||
|
summary_parts.push(format!(
|
||||||
|
"{} orphan(s) deleted",
|
||||||
|
result.orphans_deleted.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if orphans_kept > 0 {
|
||||||
|
summary_parts.push(format!("{orphans_kept} orphan(s) kept"));
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"\n{}",
|
||||||
|
Theme::info().render(&format!(" {}", summary_parts.join(", ")))
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
async fn handle_init(
|
async fn handle_init(
|
||||||
config_override: Option<&str>,
|
config_override: Option<&str>,
|
||||||
|
refresh: bool,
|
||||||
force: bool,
|
force: bool,
|
||||||
non_interactive: bool,
|
non_interactive: bool,
|
||||||
robot_mode: bool,
|
robot_mode: bool,
|
||||||
@@ -1325,6 +1515,11 @@ async fn handle_init(
|
|||||||
projects_flag: Option<String>,
|
projects_flag: Option<String>,
|
||||||
default_project_flag: Option<String>,
|
default_project_flag: Option<String>,
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// ── Handle --refresh mode ──
|
||||||
|
if refresh {
|
||||||
|
return handle_init_refresh(config_override, non_interactive, robot_mode).await;
|
||||||
|
}
|
||||||
|
|
||||||
if robot_mode {
|
if robot_mode {
|
||||||
let missing: Vec<&str> = [
|
let missing: Vec<&str> = [
|
||||||
gitlab_url_flag.is_none().then_some("--gitlab-url"),
|
gitlab_url_flag.is_none().then_some("--gitlab-url"),
|
||||||
@@ -1383,18 +1578,36 @@ async fn handle_init(
|
|||||||
eprintln!(
|
eprintln!(
|
||||||
"{}",
|
"{}",
|
||||||
Theme::error().render(&format!(
|
Theme::error().render(&format!(
|
||||||
"Config file exists at {}. Use --force to overwrite.",
|
"Config already exists at {}",
|
||||||
config_path.display()
|
config_path.display()
|
||||||
))
|
))
|
||||||
);
|
);
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
Theme::info().render(" • Use --refresh to register new projects from config")
|
||||||
|
);
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
Theme::info().render(" • Use --force to overwrite the config file")
|
||||||
|
);
|
||||||
std::process::exit(2);
|
std::process::exit(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
let confirm = Confirm::new()
|
println!(
|
||||||
.with_prompt(format!(
|
"{}",
|
||||||
"Config file exists at {}. Overwrite?",
|
Theme::warning().render(&format!(
|
||||||
|
"Config already exists at {}",
|
||||||
config_path.display()
|
config_path.display()
|
||||||
))
|
))
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
Theme::info().render(" • Use --refresh to register new projects from config")
|
||||||
|
);
|
||||||
|
println!();
|
||||||
|
|
||||||
|
let confirm = Confirm::new()
|
||||||
|
.with_prompt("Overwrite existing config?")
|
||||||
.default(false)
|
.default(false)
|
||||||
.interact()?;
|
.interact()?;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user