refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions
--- a/src/ingestion/issues.rs
+++ b/src/ingestion/issues.rs
@@ -1,12 +1,3 @@
-//! Issue ingestion with cursor-based incremental sync.
-//!
-//! Fetches issues from GitLab and stores them locally with:
-//! - Cursor-based pagination for incremental sync
-//! - Raw payload storage with deduplication
-//! - Label extraction and stale-link removal
-//! - Milestone normalization with dedicated table
-//! - Tracking of issues needing discussion sync
-
 use std::ops::Deref;

 use futures::StreamExt;
@@ -23,7 +14,6 @@ use crate::gitlab::transformers::{MilestoneRow, transform_issue};
 use crate::gitlab::types::GitLabIssue;
 use crate::ingestion::dirty_tracker;

-/// Result of issue ingestion.
 #[derive(Debug, Default)]
 pub struct IngestIssuesResult {
    pub fetched: usize,
@@ -32,36 +22,31 @@ pub struct IngestIssuesResult {
    pub issues_needing_discussion_sync: Vec<IssueForDiscussionSync>,
 }

-/// Issue that needs discussion sync.
 #[derive(Debug, Clone)]
 pub struct IssueForDiscussionSync {
    pub local_issue_id: i64,
    pub iid: i64,
-    pub updated_at: i64, // ms epoch
+    pub updated_at: i64,
 }

-/// Cursor state for incremental sync.
 #[derive(Debug, Default)]
 struct SyncCursor {
    updated_at_cursor: Option<i64>,
    tie_breaker_id: Option<i64>,
 }

-/// Ingest issues for a project.
 pub async fn ingest_issues(
    conn: &Connection,
    client: &GitLabClient,
    config: &Config,
-    project_id: i64,        // Local DB project ID
-    gitlab_project_id: i64, // GitLab project ID
+    project_id: i64,
+    gitlab_project_id: i64,
 ) -> Result<IngestIssuesResult> {
    let mut result = IngestIssuesResult::default();

-    // 1. Get current cursor
    let cursor = get_sync_cursor(conn, project_id)?;
    debug!(?cursor, "Starting issue ingestion with cursor");

-    // 2. Stream issues with cursor rewind
    let mut issues_stream = client.paginate_issues(
        gitlab_project_id,
        cursor.updated_at_cursor,
@@ -72,12 +57,10 @@ pub async fn ingest_issues(
    let mut last_updated_at: Option<i64> = None;
    let mut last_gitlab_id: Option<i64> = None;

-    // 3. Process each issue
    while let Some(issue_result) = issues_stream.next().await {
        let issue = issue_result?;
        result.fetched += 1;

-        // Parse timestamp early - skip issues with invalid timestamps
        let issue_updated_at = match parse_timestamp(&issue.updated_at) {
            Ok(ts) => ts,
            Err(e) => {
@@ -90,23 +73,19 @@ pub async fn ingest_issues(
            }
        };

-        // Apply local cursor filter (skip already-processed due to rewind overlap)
        if !passes_cursor_filter_with_ts(issue.id, issue_updated_at, &cursor) {
            debug!(gitlab_id = issue.id, "Skipping already-processed issue");
            continue;
        }

-        // Transform and store
        let labels_created = process_single_issue(conn, config, project_id, &issue)?;
        result.upserted += 1;
        result.labels_created += labels_created;

-        // Track cursor position (use already-parsed timestamp)
        last_updated_at = Some(issue_updated_at);
        last_gitlab_id = Some(issue.id);
        batch_count += 1;

-        // Incremental cursor update every 100 issues
        if batch_count % 100 == 0
            && let (Some(ts), Some(id)) = (last_updated_at, last_gitlab_id)
        {
@@ -115,17 +94,12 @@ pub async fn ingest_issues(
        }
    }

-    // 4. Final cursor update
    if let (Some(ts), Some(id)) = (last_updated_at, last_gitlab_id) {
        update_sync_cursor(conn, project_id, ts, id)?;
    } else if result.fetched == 0 && cursor.updated_at_cursor.is_some() {
-        // No new issues returned, but we have an existing cursor.
-        // Update sync_attempted_at to track that we checked (useful for monitoring)
-        // The cursor itself stays the same since there's nothing newer to advance to.
        debug!("No new issues found, cursor unchanged");
    }

-    // 5. Find issues needing discussion sync
    result.issues_needing_discussion_sync = get_issues_needing_discussion_sync(conn, project_id)?;

    info!(
@@ -139,11 +113,9 @@ pub async fn ingest_issues(
    Ok(result)
 }

-/// Check if an issue passes the cursor filter (not already processed).
-/// Takes pre-parsed timestamp to avoid redundant parsing.
 fn passes_cursor_filter_with_ts(gitlab_id: i64, issue_ts: i64, cursor: &SyncCursor) -> bool {
    let Some(cursor_ts) = cursor.updated_at_cursor else {
-        return true; // No cursor = fetch all
+        return true;
    };

    if issue_ts < cursor_ts {
@@ -160,12 +132,10 @@ fn passes_cursor_filter_with_ts(gitlab_id: i64, issue_ts: i64, cursor: &SyncCurs
    true
 }

-// Keep the original function for backward compatibility with tests
-/// Check if an issue passes the cursor filter (not already processed).
 #[cfg(test)]
 fn passes_cursor_filter(issue: &GitLabIssue, cursor: &SyncCursor) -> Result<bool> {
    let Some(cursor_ts) = cursor.updated_at_cursor else {
-        return Ok(true); // No cursor = fetch all
+        return Ok(true);
    };

    let issue_ts = parse_timestamp(&issue.updated_at)?;
@@ -185,8 +155,6 @@ fn passes_cursor_filter(issue: &GitLabIssue, cursor: &SyncCursor) -> Result<bool
    Ok(true)
 }

-/// Process a single issue: store payload, upsert issue, handle labels.
-/// All operations are wrapped in a transaction for atomicity.
 fn process_single_issue(
    conn: &Connection,
    config: &Config,
@@ -195,12 +163,10 @@ fn process_single_issue(
 ) -> Result<usize> {
    let now = now_ms();

-    // Transform issue first (outside transaction - no DB access)
    let payload_bytes = serde_json::to_vec(issue)?;
    let transformed = transform_issue(issue)?;
    let issue_row = &transformed.issue;

-    // Wrap all DB operations in a transaction for atomicity
    let tx = conn.unchecked_transaction()?;
    let labels_created = process_issue_in_transaction(
        &tx,
@@ -219,7 +185,6 @@ fn process_single_issue(
    Ok(labels_created)
 }

-/// Inner function that performs all DB operations within a transaction.
 #[allow(clippy::too_many_arguments)]
 fn process_issue_in_transaction(
    tx: &Transaction<'_>,
@@ -235,7 +200,6 @@ fn process_issue_in_transaction(
 ) -> Result<usize> {
    let mut labels_created = 0;

-    // Store raw payload (deref Transaction to Connection for store_payload)
    let payload_id = store_payload(
        tx.deref(),
        StorePayloadOptions {
@@ -247,14 +211,12 @@ fn process_issue_in_transaction(
        },
    )?;

-    // Upsert milestone if present, get local ID
    let milestone_id: Option<i64> = if let Some(m) = milestone {
        Some(upsert_milestone_tx(tx, project_id, m)?)
    } else {
        None
    };

-    // Upsert issue (including new fields: due_date, milestone_id, milestone_title)
    tx.execute(
        "INSERT INTO issues (
            gitlab_id, project_id, iid, title, description, state,
@@ -292,35 +254,29 @@ fn process_issue_in_transaction(
        ),
    )?;

-    // Get local issue ID
    let local_issue_id: i64 = tx.query_row(
        "SELECT id FROM issues WHERE project_id = ? AND iid = ?",
        (project_id, issue_row.iid),
        |row| row.get(0),
    )?;

-    // Mark dirty for document regeneration (inside transaction)
    dirty_tracker::mark_dirty_tx(tx, SourceType::Issue, local_issue_id)?;

-    // Clear existing label links (stale removal)
    tx.execute(
        "DELETE FROM issue_labels WHERE issue_id = ?",
        [local_issue_id],
    )?;

-    // Upsert labels and create links
    for label_name in label_names {
        let label_id = upsert_label_tx(tx, project_id, label_name, &mut labels_created)?;
        link_issue_label_tx(tx, local_issue_id, label_id)?;
    }

-    // Clear existing assignee links (stale removal)
    tx.execute(
        "DELETE FROM issue_assignees WHERE issue_id = ?",
        [local_issue_id],
    )?;

-    // Insert assignees
    for username in assignee_usernames {
        tx.execute(
            "INSERT OR IGNORE INTO issue_assignees (issue_id, username) VALUES (?, ?)",
@@ -331,8 +287,6 @@ fn process_issue_in_transaction(
    Ok(labels_created)
 }

-/// Upsert a label within a transaction, returning its ID.
-/// Uses INSERT...ON CONFLICT...RETURNING for a single round-trip.
 fn upsert_label_tx(
    tx: &Transaction<'_>,
    project_id: i64,
@@ -347,7 +301,6 @@ fn upsert_label_tx(
        |row| row.get(0),
    )?;

-    // If the rowid matches last_insert_rowid, this was a new insert
    if tx.last_insert_rowid() == id {
        *created_count += 1;
    }
@@ -355,7 +308,6 @@ fn upsert_label_tx(
    Ok(id)
 }

-/// Link an issue to a label within a transaction.
 fn link_issue_label_tx(tx: &Transaction<'_>, issue_id: i64, label_id: i64) -> Result<()> {
    tx.execute(
        "INSERT OR IGNORE INTO issue_labels (issue_id, label_id) VALUES (?, ?)",
@@ -364,8 +316,6 @@ fn link_issue_label_tx(tx: &Transaction<'_>, issue_id: i64, label_id: i64) -> Re
    Ok(())
 }

-/// Upsert a milestone within a transaction, returning its local ID.
-/// Uses RETURNING to avoid a separate SELECT round-trip.
 fn upsert_milestone_tx(
    tx: &Transaction<'_>,
    project_id: i64,
@@ -398,7 +348,6 @@ fn upsert_milestone_tx(
    Ok(local_id)
 }

-/// Get the current sync cursor for issues.
 fn get_sync_cursor(conn: &Connection, project_id: i64) -> Result<SyncCursor> {
    let row: Option<(Option<i64>, Option<i64>)> = conn
        .query_row(
@@ -418,7 +367,6 @@ fn get_sync_cursor(conn: &Connection, project_id: i64) -> Result<SyncCursor> {
    })
 }

-/// Update the sync cursor.
 fn update_sync_cursor(
    conn: &Connection,
    project_id: i64,
@@ -436,7 +384,6 @@ fn update_sync_cursor(
    Ok(())
 }

-/// Get issues that need discussion sync (updated_at > discussions_synced_for_updated_at).
 fn get_issues_needing_discussion_sync(
    conn: &Connection,
    project_id: i64,
@@ -460,8 +407,6 @@ fn get_issues_needing_discussion_sync(
    Ok(issues?)
 }

-/// Parse ISO 8601 timestamp to milliseconds.
-/// Returns an error if parsing fails instead of silently returning 0.
 fn parse_timestamp(ts: &str) -> Result<i64> {
    chrono::DateTime::parse_from_rfc3339(ts)
        .map(|dt| dt.timestamp_millis())
@@ -500,11 +445,10 @@ mod tests {
    #[test]
    fn cursor_filter_allows_newer_issues() {
        let cursor = SyncCursor {
-            updated_at_cursor: Some(1705312800000), // 2024-01-15T10:00:00Z
+            updated_at_cursor: Some(1705312800000),
            tie_breaker_id: Some(100),
        };

-        // Issue with later timestamp passes
        let issue = make_test_issue(101, "2024-01-16T10:00:00.000Z");
        assert!(passes_cursor_filter(&issue, &cursor).unwrap_or(false));
    }
@@ -516,7 +460,6 @@ mod tests {
            tie_breaker_id: Some(100),
        };

-        // Issue with earlier timestamp blocked
        let issue = make_test_issue(99, "2024-01-14T10:00:00.000Z");
        assert!(!passes_cursor_filter(&issue, &cursor).unwrap_or(true));
    }
@@ -528,15 +471,12 @@ mod tests {
            tie_breaker_id: Some(100),
        };

-        // Same timestamp, higher ID passes
        let issue1 = make_test_issue(101, "2024-01-15T10:00:00.000Z");
        assert!(passes_cursor_filter(&issue1, &cursor).unwrap_or(false));

-        // Same timestamp, same ID blocked
        let issue2 = make_test_issue(100, "2024-01-15T10:00:00.000Z");
        assert!(!passes_cursor_filter(&issue2, &cursor).unwrap_or(true));

-        // Same timestamp, lower ID blocked
        let issue3 = make_test_issue(99, "2024-01-15T10:00:00.000Z");
        assert!(!passes_cursor_filter(&issue3, &cursor).unwrap_or(true));
    }