refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc
comments that were duplicating information already evident from:
- Function/struct names (self-documenting code)
- Type signatures (the what is clear from types)
- Implementation context (the how is clear from code)

Affected modules:
- cli/* - Removed command descriptions duplicating clap help text
- core/* - Removed module headers and obvious function docs
- documents/* - Removed extractor/regenerator/truncation docs
- embedding/* - Removed pipeline and chunking docs
- gitlab/* - Removed client and transformer docs (kept type definitions)
- ingestion/* - Removed orchestrator and ingestion docs
- search/* - Removed FTS and vector search docs

Philosophy: Code should be self-documenting. Comments should explain
"why" (business decisions, non-obvious constraints) not "what" (which
the code itself shows). This change reduces noise and maintenance burden
while keeping the codebase just as understandable.

Retains comments for:
- Non-obvious business logic
- Important safety invariants
- Complex algorithm explanations
- Public API boundaries where generated docs matter

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions

View File

@@ -1,5 +1,3 @@
//! Stats command: document counts, embedding coverage, queue status, integrity checks.
use console::style;
use rusqlite::Connection;
use serde::Serialize;
@@ -9,7 +7,6 @@ use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::paths::get_db_path;
/// Result of the stats command.
#[derive(Debug, Default, Serialize)]
pub struct StatsResult {
pub documents: DocumentStats,
@@ -74,14 +71,12 @@ pub struct RepairResult {
pub stale_cleared: i64,
}
/// Run the stats command.
pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let mut result = StatsResult::default();
// Document counts
result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?;
result.documents.issues = count_query(
&conn,
@@ -100,7 +95,6 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu
"SELECT COUNT(*) FROM documents WHERE is_truncated = 1",
)?;
// Embedding stats — skip gracefully if table doesn't exist (Gate A only)
if table_exists(&conn, "embedding_metadata") {
let embedded = count_query(
&conn,
@@ -119,10 +113,8 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu
};
}
// FTS stats
result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?;
// Queue stats
result.queues.dirty_sources = count_query(
&conn,
"SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL",
@@ -158,15 +150,12 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu
)?;
}
// Integrity check
#[allow(clippy::field_reassign_with_default)]
if check {
let mut integrity = IntegrityResult::default();
// FTS/doc count mismatch
integrity.fts_doc_mismatch = result.fts.indexed != result.documents.total;
// Orphan embeddings (rowid/1000 should match a document ID)
if table_exists(&conn, "embeddings") {
integrity.orphan_embeddings = count_query(
&conn,
@@ -175,7 +164,6 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu
)?;
}
// Stale metadata (document_hash != current content_hash)
if table_exists(&conn, "embedding_metadata") {
integrity.stale_metadata = count_query(
&conn,
@@ -185,7 +173,6 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu
)?;
}
// Orphaned resource events (FK targets missing)
if table_exists(&conn, "resource_state_events") {
integrity.orphan_state_events = count_query(
&conn,
@@ -211,7 +198,6 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu
)?;
}
// Queue health: stuck locks and max retry attempts
if table_exists(&conn, "pending_dependent_fetches") {
integrity.queue_stuck_locks = count_query(
&conn,
@@ -232,7 +218,6 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu
&& integrity.stale_metadata == 0
&& orphan_events == 0;
// Repair
if repair {
let mut repair_result = RepairResult::default();
@@ -252,7 +237,6 @@ pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResu
)?;
repair_result.orphans_deleted = deleted as i64;
// Also clean orphaned vectors if vec0 table exists
if table_exists(&conn, "embeddings") {
let _ = conn.execute(
"DELETE FROM embeddings
@@ -299,7 +283,6 @@ fn table_exists(conn: &Connection, table: &str) -> bool {
> 0
}
/// Print human-readable stats.
pub fn print_stats(result: &StatsResult) {
println!("{}", style("Documents").cyan().bold());
println!(" Total: {}", result.documents.total);
@@ -429,14 +412,12 @@ pub fn print_stats(result: &StatsResult) {
}
}
/// JSON output structures.
#[derive(Serialize)]
struct StatsJsonOutput {
ok: bool,
data: StatsResult,
}
/// Print JSON robot-mode output.
pub fn print_stats_json(result: &StatsResult) {
let output = StatsJsonOutput {
ok: true,