refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions
--- a/src/ingestion/dirty_tracker.rs
+++ b/src/ingestion/dirty_tracker.rs
@@ -7,8 +7,6 @@ use crate::documents::SourceType;

 const DIRTY_SOURCES_BATCH_SIZE: usize = 500;

-/// Mark a source entity as dirty INSIDE an existing transaction.
-/// ON CONFLICT resets ALL backoff/error state so fresh updates are immediately eligible.
 pub fn mark_dirty_tx(
    tx: &rusqlite::Transaction<'_>,
    source_type: SourceType,
@@ -28,7 +26,6 @@ pub fn mark_dirty_tx(
    Ok(())
 }

-/// Convenience wrapper for non-transactional contexts.
 pub fn mark_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
    conn.execute(
        "INSERT INTO dirty_sources (source_type, source_id, queued_at)
@@ -44,9 +41,6 @@ pub fn mark_dirty(conn: &Connection, source_type: SourceType, source_id: i64) ->
    Ok(())
 }

-/// Get dirty sources ready for processing.
-/// Returns entries where next_attempt_at is NULL or <= now.
-/// Orders by attempt_count ASC (fresh before failed), then queued_at ASC.
 pub fn get_dirty_sources(conn: &Connection) -> Result<Vec<(SourceType, i64)>> {
    let now = now_ms();
    let mut stmt = conn.prepare(
@@ -79,7 +73,6 @@ pub fn get_dirty_sources(conn: &Connection) -> Result<Vec<(SourceType, i64)>> {
    Ok(results)
 }

-/// Clear dirty entry after successful processing.
 pub fn clear_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
    conn.execute(
        "DELETE FROM dirty_sources WHERE source_type = ?1 AND source_id = ?2",
@@ -88,7 +81,6 @@ pub fn clear_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -
    Ok(())
 }

-/// Record an error for a dirty source, incrementing attempt_count and setting backoff.
 pub fn record_dirty_error(
    conn: &Connection,
    source_type: SourceType,
@@ -96,7 +88,6 @@ pub fn record_dirty_error(
    error: &str,
 ) -> Result<()> {
    let now = now_ms();
-    // Get current attempt_count first
    let attempt_count: i64 = conn.query_row(
        "SELECT attempt_count FROM dirty_sources WHERE source_type = ?1 AND source_id = ?2",
        rusqlite::params![source_type.as_str(), source_id],
@@ -176,7 +167,6 @@ mod tests {
    fn test_requeue_resets_backoff() {
        let conn = setup_db();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
-        // Simulate error state
        record_dirty_error(&conn, SourceType::Issue, 1, "test error").unwrap();

        let attempt: i64 = conn
@@ -188,7 +178,6 @@ mod tests {
            .unwrap();
        assert_eq!(attempt, 1);

-        // Re-mark should reset
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        let attempt: i64 = conn
            .query_row(
@@ -213,7 +202,6 @@ mod tests {
    fn test_get_respects_backoff() {
        let conn = setup_db();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
-        // Set next_attempt_at far in the future
        conn.execute(
            "UPDATE dirty_sources SET next_attempt_at = 9999999999999 WHERE source_id = 1",
            [],
@@ -227,20 +215,18 @@ mod tests {
    #[test]
    fn test_get_orders_by_attempt_count() {
        let conn = setup_db();
-        // Insert issue 1 (failed, attempt_count=2)
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        conn.execute(
            "UPDATE dirty_sources SET attempt_count = 2 WHERE source_id = 1",
            [],
        )
        .unwrap();
-        // Insert issue 2 (fresh, attempt_count=0)
        mark_dirty(&conn, SourceType::Issue, 2).unwrap();

        let results = get_dirty_sources(&conn).unwrap();
        assert_eq!(results.len(), 2);
-        assert_eq!(results[0].1, 2); // Fresh first
-        assert_eq!(results[1].1, 1); // Failed second
+        assert_eq!(results[0].1, 2);
+        assert_eq!(results[1].1, 1);
    }

    #[test]