refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions
--- a/src/search/rrf.rs
+++ b/src/search/rrf.rs
@@ -2,39 +2,24 @@ use std::collections::HashMap;

 const RRF_K: f64 = 60.0;

-/// A single result from Reciprocal Rank Fusion, containing both raw and
-/// normalized scores plus per-list rank provenance for --explain output.
 pub struct RrfResult {
    pub document_id: i64,
-    /// Raw RRF score: sum of 1/(k + rank) across all lists.
    pub rrf_score: f64,
-    /// Normalized to [0, 1] where the best result is 1.0.
    pub normalized_score: f64,
-    /// 1-indexed rank in the vector results list, if present.
    pub vector_rank: Option<usize>,
-    /// 1-indexed rank in the FTS results list, if present.
    pub fts_rank: Option<usize>,
 }

-/// Combine vector and FTS retrieval results using Reciprocal Rank Fusion.
-///
-/// Input tuples are `(document_id, score/distance)` — already sorted by each retriever.
-/// Ranks are 1-indexed (first result = rank 1).
-///
-/// Score = sum of 1/(k + rank) for each list containing the document.
 pub fn rank_rrf(vector_results: &[(i64, f64)], fts_results: &[(i64, f64)]) -> Vec<RrfResult> {
    if vector_results.is_empty() && fts_results.is_empty() {
        return Vec::new();
    }

-    // (rrf_score, vector_rank, fts_rank)
    let mut scores: HashMap<i64, (f64, Option<usize>, Option<usize>)> = HashMap::new();

    for (i, &(doc_id, _)) in vector_results.iter().enumerate() {
-        let rank = i + 1; // 1-indexed
+        let rank = i + 1;
        let entry = scores.entry(doc_id).or_insert((0.0, None, None));
-        // Only count the first occurrence per list to prevent duplicates
-        // from inflating the score.
        if entry.1.is_none() {
            entry.0 += 1.0 / (RRF_K + rank as f64);
            entry.1 = Some(rank);
@@ -42,7 +27,7 @@ pub fn rank_rrf(vector_results: &[(i64, f64)], fts_results: &[(i64, f64)]) -> Ve
    }

    for (i, &(doc_id, _)) in fts_results.iter().enumerate() {
-        let rank = i + 1; // 1-indexed
+        let rank = i + 1;
        let entry = scores.entry(doc_id).or_insert((0.0, None, None));
        if entry.2.is_none() {
            entry.0 += 1.0 / (RRF_K + rank as f64);
@@ -55,16 +40,14 @@ pub fn rank_rrf(vector_results: &[(i64, f64)], fts_results: &[(i64, f64)]) -> Ve
        .map(|(doc_id, (rrf_score, vector_rank, fts_rank))| RrfResult {
            document_id: doc_id,
            rrf_score,
-            normalized_score: 0.0, // filled in below
+            normalized_score: 0.0,
            vector_rank,
            fts_rank,
        })
        .collect();

-    // Sort descending by rrf_score
    results.sort_by(|a, b| b.rrf_score.total_cmp(&a.rrf_score));

-    // Normalize: best = 1.0
    if let Some(max_score) = results.first().map(|r| r.rrf_score).filter(|&s| s > 0.0) {
        for result in &mut results {
            result.normalized_score = result.rrf_score / max_score;
@@ -84,10 +67,8 @@ mod tests {
        let fts = vec![(1, 5.0), (3, 3.0)];
        let results = rank_rrf(&vector, &fts);

-        // Doc 1 appears in both lists, should rank highest
        assert_eq!(results[0].document_id, 1);

-        // Doc 1 score should be higher than doc 2 and doc 3
        let doc1 = &results[0];
        let doc2_score = results
            .iter()
@@ -121,10 +102,8 @@ mod tests {
        let fts = vec![(1, 5.0), (3, 3.0)];
        let results = rank_rrf(&vector, &fts);

-        // Best result should have normalized_score = 1.0
        assert!((results[0].normalized_score - 1.0).abs() < f64::EPSILON);

-        // All scores in [0, 1]
        for r in &results {
            assert!(r.normalized_score >= 0.0);
            assert!(r.normalized_score <= 1.0);
@@ -165,7 +144,6 @@ mod tests {
        assert_eq!(results.len(), 1);
        let r = &results[0];

-        // RRF score = 1/(60+1) + 1/(60+1) = 2/61
        let expected = 2.0 / 61.0;
        assert!((r.rrf_score - expected).abs() < 1e-10);
        assert!((r.normalized_score - 1.0).abs() < f64::EPSILON);
@@ -177,7 +155,6 @@ mod tests {
        let results = rank_rrf(&vector, &[]);

        assert_eq!(results.len(), 2);
-        // Single result should still have normalized_score = 1.0
        assert!((results[0].normalized_score - 1.0).abs() < f64::EPSILON);
    }
 }