refactor: Remove redundant doc comments throughout codebase
Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,12 @@
|
||||
use crate::core::error::Result;
|
||||
use rusqlite::Connection;
|
||||
|
||||
/// FTS query mode.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum FtsQueryMode {
|
||||
/// Safe mode: each token wrapped in quotes, trailing * preserved on alphanumeric tokens.
|
||||
Safe,
|
||||
/// Raw mode: query passed directly to FTS5 (for advanced users).
|
||||
Raw,
|
||||
}
|
||||
|
||||
/// A single FTS5 search result.
|
||||
#[derive(Debug)]
|
||||
pub struct FtsResult {
|
||||
pub document_id: i64,
|
||||
@@ -18,14 +14,6 @@ pub struct FtsResult {
|
||||
pub snippet: String,
|
||||
}
|
||||
|
||||
/// Convert raw user input into a safe FTS5 query.
|
||||
///
|
||||
/// Safe mode:
|
||||
/// - Splits on whitespace
|
||||
/// - Wraps each token in double quotes (escaping internal quotes)
|
||||
/// - Preserves trailing `*` on alphanumeric-only tokens (prefix search)
|
||||
///
|
||||
/// Raw mode: passes through unchanged.
|
||||
pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
|
||||
match mode {
|
||||
FtsQueryMode::Raw => raw.to_string(),
|
||||
@@ -38,16 +26,13 @@ pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
|
||||
let tokens: Vec<String> = trimmed
|
||||
.split_whitespace()
|
||||
.map(|token| {
|
||||
// Check if token ends with * and the rest is alphanumeric
|
||||
if let Some(stem) = token.strip_suffix('*')
|
||||
&& !stem.is_empty()
|
||||
&& stem.chars().all(|c| c.is_alphanumeric() || c == '_')
|
||||
{
|
||||
// Preserve prefix search: "stem"*
|
||||
let escaped = stem.replace('"', "\"\"");
|
||||
return format!("\"{}\"*", escaped);
|
||||
}
|
||||
// Default: wrap in quotes, escape internal quotes
|
||||
let escaped = token.replace('"', "\"\"");
|
||||
format!("\"{}\"", escaped)
|
||||
})
|
||||
@@ -58,10 +43,6 @@ pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute an FTS5 search query.
|
||||
///
|
||||
/// Returns results ranked by BM25 score (lower = better match) with
|
||||
/// contextual snippets highlighting matches.
|
||||
pub fn search_fts(
|
||||
conn: &Connection,
|
||||
query: &str,
|
||||
@@ -97,14 +78,11 @@ pub fn search_fts(
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Generate a fallback snippet for results without FTS snippets.
|
||||
/// Truncates at a word boundary and appends "...".
|
||||
pub fn generate_fallback_snippet(content_text: &str, max_chars: usize) -> String {
|
||||
if content_text.chars().count() <= max_chars {
|
||||
return content_text.to_string();
|
||||
}
|
||||
|
||||
// Collect the char boundary at max_chars to slice correctly for multi-byte content
|
||||
let byte_end = content_text
|
||||
.char_indices()
|
||||
.nth(max_chars)
|
||||
@@ -112,7 +90,6 @@ pub fn generate_fallback_snippet(content_text: &str, max_chars: usize) -> String
|
||||
.unwrap_or(content_text.len());
|
||||
let truncated = &content_text[..byte_end];
|
||||
|
||||
// Walk backward to find a word boundary (space)
|
||||
if let Some(last_space) = truncated.rfind(' ') {
|
||||
format!("{}...", &truncated[..last_space])
|
||||
} else {
|
||||
@@ -120,7 +97,6 @@ pub fn generate_fallback_snippet(content_text: &str, max_chars: usize) -> String
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the best snippet: prefer FTS snippet, fall back to truncated content.
|
||||
pub fn get_result_snippet(fts_snippet: Option<&str>, content_text: &str) -> String {
|
||||
match fts_snippet {
|
||||
Some(s) if !s.is_empty() => s.to_string(),
|
||||
@@ -179,11 +155,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_prefix_only_alphanumeric() {
|
||||
// Non-alphanumeric prefix: C++* should NOT be treated as prefix search
|
||||
let result = to_fts_query("C++*", FtsQueryMode::Safe);
|
||||
assert_eq!(result, "\"C++*\"");
|
||||
|
||||
// Pure alphanumeric prefix: auth* should be prefix search
|
||||
let result = to_fts_query("auth*", FtsQueryMode::Safe);
|
||||
assert_eq!(result, "\"auth\"*");
|
||||
}
|
||||
@@ -205,7 +179,7 @@ mod tests {
|
||||
let content = "This is a moderately long piece of text that should be truncated at a word boundary for readability purposes";
|
||||
let result = generate_fallback_snippet(content, 50);
|
||||
assert!(result.ends_with("..."));
|
||||
assert!(result.len() <= 55); // 50 + "..."
|
||||
assert!(result.len() <= 55);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user