refactor: Remove redundant doc comments throughout codebase
Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
//! Generate searchable documents from ingested GitLab data.
|
||||
|
||||
use console::style;
|
||||
use rusqlite::Connection;
|
||||
use serde::Serialize;
|
||||
@@ -14,7 +12,6 @@ use crate::documents::{SourceType, regenerate_dirty_documents};
|
||||
|
||||
const FULL_MODE_CHUNK_SIZE: i64 = 2000;
|
||||
|
||||
/// Result of a generate-docs run.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct GenerateDocsResult {
|
||||
pub regenerated: usize,
|
||||
@@ -24,12 +21,6 @@ pub struct GenerateDocsResult {
|
||||
pub full_mode: bool,
|
||||
}
|
||||
|
||||
/// Run the generate-docs pipeline.
|
||||
///
|
||||
/// Default mode: process only existing dirty_sources entries.
|
||||
/// Full mode: seed dirty_sources with ALL entities, then drain.
|
||||
///
|
||||
/// `progress_callback` reports `(processed, estimated_total)` as documents are generated.
|
||||
pub fn run_generate_docs(
|
||||
config: &Config,
|
||||
full: bool,
|
||||
@@ -56,7 +47,6 @@ pub fn run_generate_docs(
|
||||
result.errored = regen.errored;
|
||||
|
||||
if full {
|
||||
// Optimize FTS index after bulk rebuild
|
||||
let _ = conn.execute(
|
||||
"INSERT INTO documents_fts(documents_fts) VALUES('optimize')",
|
||||
[],
|
||||
@@ -67,7 +57,6 @@ pub fn run_generate_docs(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Seed dirty_sources with all entities of the given type using keyset pagination.
|
||||
fn seed_dirty(
|
||||
conn: &Connection,
|
||||
source_type: SourceType,
|
||||
@@ -113,7 +102,6 @@ fn seed_dirty(
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance keyset cursor to the max id within the chunk window
|
||||
let max_id: i64 = conn.query_row(
|
||||
&format!(
|
||||
"SELECT MAX(id) FROM (SELECT id FROM {table} WHERE id > ?1 ORDER BY id LIMIT ?2)",
|
||||
@@ -136,7 +124,6 @@ fn seed_dirty(
|
||||
Ok(total_seeded)
|
||||
}
|
||||
|
||||
/// Print human-readable output.
|
||||
pub fn print_generate_docs(result: &GenerateDocsResult) {
|
||||
let mode = if result.full_mode {
|
||||
"full"
|
||||
@@ -159,7 +146,6 @@ pub fn print_generate_docs(result: &GenerateDocsResult) {
|
||||
}
|
||||
}
|
||||
|
||||
/// JSON output structures.
|
||||
#[derive(Serialize)]
|
||||
struct GenerateDocsJsonOutput {
|
||||
ok: bool,
|
||||
@@ -176,7 +162,6 @@ struct GenerateDocsJsonData {
|
||||
errored: usize,
|
||||
}
|
||||
|
||||
/// Print JSON robot-mode output.
|
||||
pub fn print_generate_docs_json(result: &GenerateDocsResult) {
|
||||
let output = GenerateDocsJsonOutput {
|
||||
ok: true,
|
||||
|
||||
Reference in New Issue
Block a user