refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions
--- a/src/embedding/change_detector.rs
+++ b/src/embedding/change_detector.rs
@@ -1,11 +1,8 @@
-//! Detect documents needing (re-)embedding based on content hash changes.
-
 use rusqlite::Connection;

 use crate::core::error::Result;
 use crate::embedding::chunking::{CHUNK_MAX_BYTES, EXPECTED_DIMS};

-/// A document that needs embedding or re-embedding.
 #[derive(Debug)]
 pub struct PendingDocument {
    pub document_id: i64,
@@ -13,20 +10,12 @@ pub struct PendingDocument {
    pub content_hash: String,
 }

-/// Find documents that need embedding: new (no metadata), changed (hash mismatch),
-/// or config-drifted (chunk_max_bytes/model/dims mismatch).
-///
-/// Uses keyset pagination (WHERE d.id > last_id) and returns up to `page_size` results.
 pub fn find_pending_documents(
    conn: &Connection,
    page_size: usize,
    last_id: i64,
    model_name: &str,
 ) -> Result<Vec<PendingDocument>> {
-    // Documents that either:
-    // 1. Have no embedding_metadata at all (new)
-    // 2. Have metadata where document_hash != content_hash (changed)
-    // 3. Config drift: chunk_max_bytes, model, or dims mismatch (or pre-migration NULL)
    let sql = r#"
        SELECT d.id, d.content_text, d.content_hash
        FROM documents d
@@ -79,7 +68,6 @@ pub fn find_pending_documents(
    Ok(rows)
 }

-/// Count total documents that need embedding.
 pub fn count_pending_documents(conn: &Connection, model_name: &str) -> Result<i64> {
    let count: i64 = conn.query_row(
        r#"