refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions
--- a/src/cli/commands/embed.rs
+++ b/src/cli/commands/embed.rs
@@ -1,5 +1,3 @@
-//! Embed command: generate vector embeddings for documents via Ollama.
-
 use console::style;
 use serde::Serialize;

@@ -10,7 +8,6 @@ use crate::core::paths::get_db_path;
 use crate::embedding::ollama::{OllamaClient, OllamaConfig};
 use crate::embedding::pipeline::embed_documents;

-/// Result of the embed command.
 #[derive(Debug, Default, Serialize)]
 pub struct EmbedCommandResult {
    pub embedded: usize,
@@ -18,9 +15,6 @@ pub struct EmbedCommandResult {
    pub skipped: usize,
 }

-/// Run the embed command.
-///
-/// `progress_callback` reports `(processed, total)` as documents are embedded.
 pub async fn run_embed(
    config: &Config,
    full: bool,
@@ -30,7 +24,6 @@ pub async fn run_embed(
    let db_path = get_db_path(config.storage.db_path.as_deref());
    let conn = create_connection(&db_path)?;

-    // Build Ollama config from user settings
    let ollama_config = OllamaConfig {
        base_url: config.embedding.base_url.clone(),
        model: config.embedding.model.clone(),
@@ -38,13 +31,9 @@ pub async fn run_embed(
    };
    let client = OllamaClient::new(ollama_config);

-    // Health check — fail fast if Ollama is down or model missing
    client.health_check().await?;

    if full {
-        // Clear ALL embeddings and metadata atomically for a complete re-embed.
-        // Wrapped in a transaction so a crash between the two DELETEs can't
-        // leave orphaned data.
        conn.execute_batch(
            "BEGIN;
             DELETE FROM embedding_metadata;
@@ -52,7 +41,6 @@ pub async fn run_embed(
             COMMIT;",
        )?;
    } else if retry_failed {
-        // Clear errors so they become pending again
        conn.execute(
            "UPDATE embedding_metadata SET last_error = NULL, attempt_count = 0
             WHERE last_error IS NOT NULL",
@@ -70,7 +58,6 @@ pub async fn run_embed(
    })
 }

-/// Print human-readable output.
 pub fn print_embed(result: &EmbedCommandResult) {
    println!("{} Embedding complete", style("done").green().bold(),);
    println!("  Embedded: {}", result.embedded);
@@ -82,14 +69,12 @@ pub fn print_embed(result: &EmbedCommandResult) {
    }
 }

-/// JSON output.
 #[derive(Serialize)]
 struct EmbedJsonOutput<'a> {
    ok: bool,
    data: &'a EmbedCommandResult,
 }

-/// Print JSON robot-mode output.
 pub fn print_embed_json(result: &EmbedCommandResult) {
    let output = EmbedJsonOutput {
        ok: true,