refactor: Remove redundant doc comments throughout codebase
Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,12 +1,3 @@
|
||||
//! Issue ingestion with cursor-based incremental sync.
|
||||
//!
|
||||
//! Fetches issues from GitLab and stores them locally with:
|
||||
//! - Cursor-based pagination for incremental sync
|
||||
//! - Raw payload storage with deduplication
|
||||
//! - Label extraction and stale-link removal
|
||||
//! - Milestone normalization with dedicated table
|
||||
//! - Tracking of issues needing discussion sync
|
||||
|
||||
use std::ops::Deref;
|
||||
|
||||
use futures::StreamExt;
|
||||
@@ -23,7 +14,6 @@ use crate::gitlab::transformers::{MilestoneRow, transform_issue};
|
||||
use crate::gitlab::types::GitLabIssue;
|
||||
use crate::ingestion::dirty_tracker;
|
||||
|
||||
/// Result of issue ingestion.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct IngestIssuesResult {
|
||||
pub fetched: usize,
|
||||
@@ -32,36 +22,31 @@ pub struct IngestIssuesResult {
|
||||
pub issues_needing_discussion_sync: Vec<IssueForDiscussionSync>,
|
||||
}
|
||||
|
||||
/// Issue that needs discussion sync.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IssueForDiscussionSync {
|
||||
pub local_issue_id: i64,
|
||||
pub iid: i64,
|
||||
pub updated_at: i64, // ms epoch
|
||||
pub updated_at: i64,
|
||||
}
|
||||
|
||||
/// Cursor state for incremental sync.
|
||||
#[derive(Debug, Default)]
|
||||
struct SyncCursor {
|
||||
updated_at_cursor: Option<i64>,
|
||||
tie_breaker_id: Option<i64>,
|
||||
}
|
||||
|
||||
/// Ingest issues for a project.
|
||||
pub async fn ingest_issues(
|
||||
conn: &Connection,
|
||||
client: &GitLabClient,
|
||||
config: &Config,
|
||||
project_id: i64, // Local DB project ID
|
||||
gitlab_project_id: i64, // GitLab project ID
|
||||
project_id: i64,
|
||||
gitlab_project_id: i64,
|
||||
) -> Result<IngestIssuesResult> {
|
||||
let mut result = IngestIssuesResult::default();
|
||||
|
||||
// 1. Get current cursor
|
||||
let cursor = get_sync_cursor(conn, project_id)?;
|
||||
debug!(?cursor, "Starting issue ingestion with cursor");
|
||||
|
||||
// 2. Stream issues with cursor rewind
|
||||
let mut issues_stream = client.paginate_issues(
|
||||
gitlab_project_id,
|
||||
cursor.updated_at_cursor,
|
||||
@@ -72,12 +57,10 @@ pub async fn ingest_issues(
|
||||
let mut last_updated_at: Option<i64> = None;
|
||||
let mut last_gitlab_id: Option<i64> = None;
|
||||
|
||||
// 3. Process each issue
|
||||
while let Some(issue_result) = issues_stream.next().await {
|
||||
let issue = issue_result?;
|
||||
result.fetched += 1;
|
||||
|
||||
// Parse timestamp early - skip issues with invalid timestamps
|
||||
let issue_updated_at = match parse_timestamp(&issue.updated_at) {
|
||||
Ok(ts) => ts,
|
||||
Err(e) => {
|
||||
@@ -90,23 +73,19 @@ pub async fn ingest_issues(
|
||||
}
|
||||
};
|
||||
|
||||
// Apply local cursor filter (skip already-processed due to rewind overlap)
|
||||
if !passes_cursor_filter_with_ts(issue.id, issue_updated_at, &cursor) {
|
||||
debug!(gitlab_id = issue.id, "Skipping already-processed issue");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Transform and store
|
||||
let labels_created = process_single_issue(conn, config, project_id, &issue)?;
|
||||
result.upserted += 1;
|
||||
result.labels_created += labels_created;
|
||||
|
||||
// Track cursor position (use already-parsed timestamp)
|
||||
last_updated_at = Some(issue_updated_at);
|
||||
last_gitlab_id = Some(issue.id);
|
||||
batch_count += 1;
|
||||
|
||||
// Incremental cursor update every 100 issues
|
||||
if batch_count % 100 == 0
|
||||
&& let (Some(ts), Some(id)) = (last_updated_at, last_gitlab_id)
|
||||
{
|
||||
@@ -115,17 +94,12 @@ pub async fn ingest_issues(
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Final cursor update
|
||||
if let (Some(ts), Some(id)) = (last_updated_at, last_gitlab_id) {
|
||||
update_sync_cursor(conn, project_id, ts, id)?;
|
||||
} else if result.fetched == 0 && cursor.updated_at_cursor.is_some() {
|
||||
// No new issues returned, but we have an existing cursor.
|
||||
// Update sync_attempted_at to track that we checked (useful for monitoring)
|
||||
// The cursor itself stays the same since there's nothing newer to advance to.
|
||||
debug!("No new issues found, cursor unchanged");
|
||||
}
|
||||
|
||||
// 5. Find issues needing discussion sync
|
||||
result.issues_needing_discussion_sync = get_issues_needing_discussion_sync(conn, project_id)?;
|
||||
|
||||
info!(
|
||||
@@ -139,11 +113,9 @@ pub async fn ingest_issues(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Check if an issue passes the cursor filter (not already processed).
|
||||
/// Takes pre-parsed timestamp to avoid redundant parsing.
|
||||
fn passes_cursor_filter_with_ts(gitlab_id: i64, issue_ts: i64, cursor: &SyncCursor) -> bool {
|
||||
let Some(cursor_ts) = cursor.updated_at_cursor else {
|
||||
return true; // No cursor = fetch all
|
||||
return true;
|
||||
};
|
||||
|
||||
if issue_ts < cursor_ts {
|
||||
@@ -160,12 +132,10 @@ fn passes_cursor_filter_with_ts(gitlab_id: i64, issue_ts: i64, cursor: &SyncCurs
|
||||
true
|
||||
}
|
||||
|
||||
// Keep the original function for backward compatibility with tests
|
||||
/// Check if an issue passes the cursor filter (not already processed).
|
||||
#[cfg(test)]
|
||||
fn passes_cursor_filter(issue: &GitLabIssue, cursor: &SyncCursor) -> Result<bool> {
|
||||
let Some(cursor_ts) = cursor.updated_at_cursor else {
|
||||
return Ok(true); // No cursor = fetch all
|
||||
return Ok(true);
|
||||
};
|
||||
|
||||
let issue_ts = parse_timestamp(&issue.updated_at)?;
|
||||
@@ -185,8 +155,6 @@ fn passes_cursor_filter(issue: &GitLabIssue, cursor: &SyncCursor) -> Result<bool
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Process a single issue: store payload, upsert issue, handle labels.
|
||||
/// All operations are wrapped in a transaction for atomicity.
|
||||
fn process_single_issue(
|
||||
conn: &Connection,
|
||||
config: &Config,
|
||||
@@ -195,12 +163,10 @@ fn process_single_issue(
|
||||
) -> Result<usize> {
|
||||
let now = now_ms();
|
||||
|
||||
// Transform issue first (outside transaction - no DB access)
|
||||
let payload_bytes = serde_json::to_vec(issue)?;
|
||||
let transformed = transform_issue(issue)?;
|
||||
let issue_row = &transformed.issue;
|
||||
|
||||
// Wrap all DB operations in a transaction for atomicity
|
||||
let tx = conn.unchecked_transaction()?;
|
||||
let labels_created = process_issue_in_transaction(
|
||||
&tx,
|
||||
@@ -219,7 +185,6 @@ fn process_single_issue(
|
||||
Ok(labels_created)
|
||||
}
|
||||
|
||||
/// Inner function that performs all DB operations within a transaction.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn process_issue_in_transaction(
|
||||
tx: &Transaction<'_>,
|
||||
@@ -235,7 +200,6 @@ fn process_issue_in_transaction(
|
||||
) -> Result<usize> {
|
||||
let mut labels_created = 0;
|
||||
|
||||
// Store raw payload (deref Transaction to Connection for store_payload)
|
||||
let payload_id = store_payload(
|
||||
tx.deref(),
|
||||
StorePayloadOptions {
|
||||
@@ -247,14 +211,12 @@ fn process_issue_in_transaction(
|
||||
},
|
||||
)?;
|
||||
|
||||
// Upsert milestone if present, get local ID
|
||||
let milestone_id: Option<i64> = if let Some(m) = milestone {
|
||||
Some(upsert_milestone_tx(tx, project_id, m)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Upsert issue (including new fields: due_date, milestone_id, milestone_title)
|
||||
tx.execute(
|
||||
"INSERT INTO issues (
|
||||
gitlab_id, project_id, iid, title, description, state,
|
||||
@@ -292,35 +254,29 @@ fn process_issue_in_transaction(
|
||||
),
|
||||
)?;
|
||||
|
||||
// Get local issue ID
|
||||
let local_issue_id: i64 = tx.query_row(
|
||||
"SELECT id FROM issues WHERE project_id = ? AND iid = ?",
|
||||
(project_id, issue_row.iid),
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
|
||||
// Mark dirty for document regeneration (inside transaction)
|
||||
dirty_tracker::mark_dirty_tx(tx, SourceType::Issue, local_issue_id)?;
|
||||
|
||||
// Clear existing label links (stale removal)
|
||||
tx.execute(
|
||||
"DELETE FROM issue_labels WHERE issue_id = ?",
|
||||
[local_issue_id],
|
||||
)?;
|
||||
|
||||
// Upsert labels and create links
|
||||
for label_name in label_names {
|
||||
let label_id = upsert_label_tx(tx, project_id, label_name, &mut labels_created)?;
|
||||
link_issue_label_tx(tx, local_issue_id, label_id)?;
|
||||
}
|
||||
|
||||
// Clear existing assignee links (stale removal)
|
||||
tx.execute(
|
||||
"DELETE FROM issue_assignees WHERE issue_id = ?",
|
||||
[local_issue_id],
|
||||
)?;
|
||||
|
||||
// Insert assignees
|
||||
for username in assignee_usernames {
|
||||
tx.execute(
|
||||
"INSERT OR IGNORE INTO issue_assignees (issue_id, username) VALUES (?, ?)",
|
||||
@@ -331,8 +287,6 @@ fn process_issue_in_transaction(
|
||||
Ok(labels_created)
|
||||
}
|
||||
|
||||
/// Upsert a label within a transaction, returning its ID.
|
||||
/// Uses INSERT...ON CONFLICT...RETURNING for a single round-trip.
|
||||
fn upsert_label_tx(
|
||||
tx: &Transaction<'_>,
|
||||
project_id: i64,
|
||||
@@ -347,7 +301,6 @@ fn upsert_label_tx(
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
|
||||
// If the rowid matches last_insert_rowid, this was a new insert
|
||||
if tx.last_insert_rowid() == id {
|
||||
*created_count += 1;
|
||||
}
|
||||
@@ -355,7 +308,6 @@ fn upsert_label_tx(
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Link an issue to a label within a transaction.
|
||||
fn link_issue_label_tx(tx: &Transaction<'_>, issue_id: i64, label_id: i64) -> Result<()> {
|
||||
tx.execute(
|
||||
"INSERT OR IGNORE INTO issue_labels (issue_id, label_id) VALUES (?, ?)",
|
||||
@@ -364,8 +316,6 @@ fn link_issue_label_tx(tx: &Transaction<'_>, issue_id: i64, label_id: i64) -> Re
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Upsert a milestone within a transaction, returning its local ID.
|
||||
/// Uses RETURNING to avoid a separate SELECT round-trip.
|
||||
fn upsert_milestone_tx(
|
||||
tx: &Transaction<'_>,
|
||||
project_id: i64,
|
||||
@@ -398,7 +348,6 @@ fn upsert_milestone_tx(
|
||||
Ok(local_id)
|
||||
}
|
||||
|
||||
/// Get the current sync cursor for issues.
|
||||
fn get_sync_cursor(conn: &Connection, project_id: i64) -> Result<SyncCursor> {
|
||||
let row: Option<(Option<i64>, Option<i64>)> = conn
|
||||
.query_row(
|
||||
@@ -418,7 +367,6 @@ fn get_sync_cursor(conn: &Connection, project_id: i64) -> Result<SyncCursor> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Update the sync cursor.
|
||||
fn update_sync_cursor(
|
||||
conn: &Connection,
|
||||
project_id: i64,
|
||||
@@ -436,7 +384,6 @@ fn update_sync_cursor(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get issues that need discussion sync (updated_at > discussions_synced_for_updated_at).
|
||||
fn get_issues_needing_discussion_sync(
|
||||
conn: &Connection,
|
||||
project_id: i64,
|
||||
@@ -460,8 +407,6 @@ fn get_issues_needing_discussion_sync(
|
||||
Ok(issues?)
|
||||
}
|
||||
|
||||
/// Parse ISO 8601 timestamp to milliseconds.
|
||||
/// Returns an error if parsing fails instead of silently returning 0.
|
||||
fn parse_timestamp(ts: &str) -> Result<i64> {
|
||||
chrono::DateTime::parse_from_rfc3339(ts)
|
||||
.map(|dt| dt.timestamp_millis())
|
||||
@@ -500,11 +445,10 @@ mod tests {
|
||||
#[test]
|
||||
fn cursor_filter_allows_newer_issues() {
|
||||
let cursor = SyncCursor {
|
||||
updated_at_cursor: Some(1705312800000), // 2024-01-15T10:00:00Z
|
||||
updated_at_cursor: Some(1705312800000),
|
||||
tie_breaker_id: Some(100),
|
||||
};
|
||||
|
||||
// Issue with later timestamp passes
|
||||
let issue = make_test_issue(101, "2024-01-16T10:00:00.000Z");
|
||||
assert!(passes_cursor_filter(&issue, &cursor).unwrap_or(false));
|
||||
}
|
||||
@@ -516,7 +460,6 @@ mod tests {
|
||||
tie_breaker_id: Some(100),
|
||||
};
|
||||
|
||||
// Issue with earlier timestamp blocked
|
||||
let issue = make_test_issue(99, "2024-01-14T10:00:00.000Z");
|
||||
assert!(!passes_cursor_filter(&issue, &cursor).unwrap_or(true));
|
||||
}
|
||||
@@ -528,15 +471,12 @@ mod tests {
|
||||
tie_breaker_id: Some(100),
|
||||
};
|
||||
|
||||
// Same timestamp, higher ID passes
|
||||
let issue1 = make_test_issue(101, "2024-01-15T10:00:00.000Z");
|
||||
assert!(passes_cursor_filter(&issue1, &cursor).unwrap_or(false));
|
||||
|
||||
// Same timestamp, same ID blocked
|
||||
let issue2 = make_test_issue(100, "2024-01-15T10:00:00.000Z");
|
||||
assert!(!passes_cursor_filter(&issue2, &cursor).unwrap_or(true));
|
||||
|
||||
// Same timestamp, lower ID blocked
|
||||
let issue3 = make_test_issue(99, "2024-01-15T10:00:00.000Z");
|
||||
assert!(!passes_cursor_filter(&issue3, &cursor).unwrap_or(true));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user