refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc
comments that were duplicating information already evident from:
- Function/struct names (self-documenting code)
- Type signatures (the what is clear from types)
- Implementation context (the how is clear from code)

Affected modules:
- cli/* - Removed command descriptions duplicating clap help text
- core/* - Removed module headers and obvious function docs
- documents/* - Removed extractor/regenerator/truncation docs
- embedding/* - Removed pipeline and chunking docs
- gitlab/* - Removed client and transformer docs (kept type definitions)
- ingestion/* - Removed orchestrator and ingestion docs
- search/* - Removed FTS and vector search docs

Philosophy: Code should be self-documenting. Comments should explain
"why" (business decisions, non-obvious constraints) not "what" (which
the code itself shows). This change reduces noise and maintenance burden
while keeping the codebase just as understandable.

Retains comments for:
- Non-obvious business logic
- Important safety invariants
- Complex algorithm explanations
- Public API boundaries where generated docs matter

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions

View File

@@ -1,5 +1,3 @@
//! Generate searchable documents from ingested GitLab data.
use console::style;
use rusqlite::Connection;
use serde::Serialize;
@@ -14,7 +12,6 @@ use crate::documents::{SourceType, regenerate_dirty_documents};
const FULL_MODE_CHUNK_SIZE: i64 = 2000;
/// Result of a generate-docs run.
#[derive(Debug, Default)]
pub struct GenerateDocsResult {
pub regenerated: usize,
@@ -24,12 +21,6 @@ pub struct GenerateDocsResult {
pub full_mode: bool,
}
/// Run the generate-docs pipeline.
///
/// Default mode: process only existing dirty_sources entries.
/// Full mode: seed dirty_sources with ALL entities, then drain.
///
/// `progress_callback` reports `(processed, estimated_total)` as documents are generated.
pub fn run_generate_docs(
config: &Config,
full: bool,
@@ -56,7 +47,6 @@ pub fn run_generate_docs(
result.errored = regen.errored;
if full {
// Optimize FTS index after bulk rebuild
let _ = conn.execute(
"INSERT INTO documents_fts(documents_fts) VALUES('optimize')",
[],
@@ -67,7 +57,6 @@ pub fn run_generate_docs(
Ok(result)
}
/// Seed dirty_sources with all entities of the given type using keyset pagination.
fn seed_dirty(
conn: &Connection,
source_type: SourceType,
@@ -113,7 +102,6 @@ fn seed_dirty(
break;
}
// Advance keyset cursor to the max id within the chunk window
let max_id: i64 = conn.query_row(
&format!(
"SELECT MAX(id) FROM (SELECT id FROM {table} WHERE id > ?1 ORDER BY id LIMIT ?2)",
@@ -136,7 +124,6 @@ fn seed_dirty(
Ok(total_seeded)
}
/// Print human-readable output.
pub fn print_generate_docs(result: &GenerateDocsResult) {
let mode = if result.full_mode {
"full"
@@ -159,7 +146,6 @@ pub fn print_generate_docs(result: &GenerateDocsResult) {
}
}
/// JSON output structures.
#[derive(Serialize)]
struct GenerateDocsJsonOutput {
ok: bool,
@@ -176,7 +162,6 @@ struct GenerateDocsJsonData {
errored: usize,
}
/// Print JSON robot-mode output.
pub fn print_generate_docs_json(result: &GenerateDocsResult) {
let output = GenerateDocsJsonOutput {
ok: true,