refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc
comments that were duplicating information already evident from:
- Function/struct names (self-documenting code)
- Type signatures (the what is clear from types)
- Implementation context (the how is clear from code)

Affected modules:
- cli/* - Removed command descriptions duplicating clap help text
- core/* - Removed module headers and obvious function docs
- documents/* - Removed extractor/regenerator/truncation docs
- embedding/* - Removed pipeline and chunking docs
- gitlab/* - Removed client and transformer docs (kept type definitions)
- ingestion/* - Removed orchestrator and ingestion docs
- search/* - Removed FTS and vector search docs

Philosophy: Code should be self-documenting. Comments should explain
"why" (business decisions, non-obvious constraints) not "what" (which
the code itself shows). This change reduces noise and maintenance burden
while keeping the codebase just as understandable.

Retains comments for:
- Non-obvious business logic
- Important safety invariants
- Complex algorithm explanations
- Public API boundaries where generated docs matter

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions

View File

@@ -7,8 +7,6 @@ use crate::documents::SourceType;
const DIRTY_SOURCES_BATCH_SIZE: usize = 500;
/// Mark a source entity as dirty INSIDE an existing transaction.
/// ON CONFLICT resets ALL backoff/error state so fresh updates are immediately eligible.
pub fn mark_dirty_tx(
tx: &rusqlite::Transaction<'_>,
source_type: SourceType,
@@ -28,7 +26,6 @@ pub fn mark_dirty_tx(
Ok(())
}
/// Convenience wrapper for non-transactional contexts.
pub fn mark_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
@@ -44,9 +41,6 @@ pub fn mark_dirty(conn: &Connection, source_type: SourceType, source_id: i64) ->
Ok(())
}
/// Get dirty sources ready for processing.
/// Returns entries where next_attempt_at is NULL or <= now.
/// Orders by attempt_count ASC (fresh before failed), then queued_at ASC.
pub fn get_dirty_sources(conn: &Connection) -> Result<Vec<(SourceType, i64)>> {
let now = now_ms();
let mut stmt = conn.prepare(
@@ -79,7 +73,6 @@ pub fn get_dirty_sources(conn: &Connection) -> Result<Vec<(SourceType, i64)>> {
Ok(results)
}
/// Clear dirty entry after successful processing.
pub fn clear_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
conn.execute(
"DELETE FROM dirty_sources WHERE source_type = ?1 AND source_id = ?2",
@@ -88,7 +81,6 @@ pub fn clear_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -
Ok(())
}
/// Record an error for a dirty source, incrementing attempt_count and setting backoff.
pub fn record_dirty_error(
conn: &Connection,
source_type: SourceType,
@@ -96,7 +88,6 @@ pub fn record_dirty_error(
error: &str,
) -> Result<()> {
let now = now_ms();
// Get current attempt_count first
let attempt_count: i64 = conn.query_row(
"SELECT attempt_count FROM dirty_sources WHERE source_type = ?1 AND source_id = ?2",
rusqlite::params![source_type.as_str(), source_id],
@@ -176,7 +167,6 @@ mod tests {
fn test_requeue_resets_backoff() {
let conn = setup_db();
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
// Simulate error state
record_dirty_error(&conn, SourceType::Issue, 1, "test error").unwrap();
let attempt: i64 = conn
@@ -188,7 +178,6 @@ mod tests {
.unwrap();
assert_eq!(attempt, 1);
// Re-mark should reset
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let attempt: i64 = conn
.query_row(
@@ -213,7 +202,6 @@ mod tests {
fn test_get_respects_backoff() {
let conn = setup_db();
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
// Set next_attempt_at far in the future
conn.execute(
"UPDATE dirty_sources SET next_attempt_at = 9999999999999 WHERE source_id = 1",
[],
@@ -227,20 +215,18 @@ mod tests {
#[test]
fn test_get_orders_by_attempt_count() {
let conn = setup_db();
// Insert issue 1 (failed, attempt_count=2)
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
conn.execute(
"UPDATE dirty_sources SET attempt_count = 2 WHERE source_id = 1",
[],
)
.unwrap();
// Insert issue 2 (fresh, attempt_count=0)
mark_dirty(&conn, SourceType::Issue, 2).unwrap();
let results = get_dirty_sources(&conn).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].1, 2); // Fresh first
assert_eq!(results[1].1, 1); // Failed second
assert_eq!(results[0].1, 2);
assert_eq!(results[1].1, 1);
}
#[test]