refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc
comments that were duplicating information already evident from:
- Function/struct names (self-documenting code)
- Type signatures (the what is clear from types)
- Implementation context (the how is clear from code)

Affected modules:
- cli/* - Removed command descriptions duplicating clap help text
- core/* - Removed module headers and obvious function docs
- documents/* - Removed extractor/regenerator/truncation docs
- embedding/* - Removed pipeline and chunking docs
- gitlab/* - Removed client and transformer docs (kept type definitions)
- ingestion/* - Removed orchestrator and ingestion docs
- search/* - Removed FTS and vector search docs

Philosophy: Code should be self-documenting. Comments should explain
"why" (business decisions, non-obvious constraints) not "what" (which
the code itself shows). This change reduces noise and maintenance burden
while keeping the codebase just as understandable.

Retains comments for:
- Non-obvious business logic
- Important safety invariants
- Complex algorithm explanations
- Public API boundaries where generated docs matter

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions

View File

@@ -1,16 +1,12 @@
use crate::core::error::Result;
use rusqlite::Connection;
/// FTS query mode.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FtsQueryMode {
/// Safe mode: each token wrapped in quotes, trailing * preserved on alphanumeric tokens.
Safe,
/// Raw mode: query passed directly to FTS5 (for advanced users).
Raw,
}
/// A single FTS5 search result.
#[derive(Debug)]
pub struct FtsResult {
pub document_id: i64,
@@ -18,14 +14,6 @@ pub struct FtsResult {
pub snippet: String,
}
/// Convert raw user input into a safe FTS5 query.
///
/// Safe mode:
/// - Splits on whitespace
/// - Wraps each token in double quotes (escaping internal quotes)
/// - Preserves trailing `*` on alphanumeric-only tokens (prefix search)
///
/// Raw mode: passes through unchanged.
pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
match mode {
FtsQueryMode::Raw => raw.to_string(),
@@ -38,16 +26,13 @@ pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
let tokens: Vec<String> = trimmed
.split_whitespace()
.map(|token| {
// Check if token ends with * and the rest is alphanumeric
if let Some(stem) = token.strip_suffix('*')
&& !stem.is_empty()
&& stem.chars().all(|c| c.is_alphanumeric() || c == '_')
{
// Preserve prefix search: "stem"*
let escaped = stem.replace('"', "\"\"");
return format!("\"{}\"*", escaped);
}
// Default: wrap in quotes, escape internal quotes
let escaped = token.replace('"', "\"\"");
format!("\"{}\"", escaped)
})
@@ -58,10 +43,6 @@ pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
}
}
/// Execute an FTS5 search query.
///
/// Returns results ranked by BM25 score (lower = better match) with
/// contextual snippets highlighting matches.
pub fn search_fts(
conn: &Connection,
query: &str,
@@ -97,14 +78,11 @@ pub fn search_fts(
Ok(results)
}
/// Generate a fallback snippet for results without FTS snippets.
/// Truncates at a word boundary and appends "...".
pub fn generate_fallback_snippet(content_text: &str, max_chars: usize) -> String {
if content_text.chars().count() <= max_chars {
return content_text.to_string();
}
// Collect the char boundary at max_chars to slice correctly for multi-byte content
let byte_end = content_text
.char_indices()
.nth(max_chars)
@@ -112,7 +90,6 @@ pub fn generate_fallback_snippet(content_text: &str, max_chars: usize) -> String
.unwrap_or(content_text.len());
let truncated = &content_text[..byte_end];
// Walk backward to find a word boundary (space)
if let Some(last_space) = truncated.rfind(' ') {
format!("{}...", &truncated[..last_space])
} else {
@@ -120,7 +97,6 @@ pub fn generate_fallback_snippet(content_text: &str, max_chars: usize) -> String
}
}
/// Get the best snippet: prefer FTS snippet, fall back to truncated content.
pub fn get_result_snippet(fts_snippet: Option<&str>, content_text: &str) -> String {
match fts_snippet {
Some(s) if !s.is_empty() => s.to_string(),
@@ -179,11 +155,9 @@ mod tests {
#[test]
fn test_prefix_only_alphanumeric() {
// Non-alphanumeric prefix: C++* should NOT be treated as prefix search
let result = to_fts_query("C++*", FtsQueryMode::Safe);
assert_eq!(result, "\"C++*\"");
// Pure alphanumeric prefix: auth* should be prefix search
let result = to_fts_query("auth*", FtsQueryMode::Safe);
assert_eq!(result, "\"auth\"*");
}
@@ -205,7 +179,7 @@ mod tests {
let content = "This is a moderately long piece of text that should be truncated at a word boundary for readability purposes";
let result = generate_fallback_snippet(content, 50);
assert!(result.ends_with("..."));
assert!(result.len() <= 55); // 50 + "..."
assert!(result.len() <= 55);
}
#[test]