refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc
comments that were duplicating information already evident from:
- Function/struct names (self-documenting code)
- Type signatures (the what is clear from types)
- Implementation context (the how is clear from code)

Affected modules:
- cli/* - Removed command descriptions duplicating clap help text
- core/* - Removed module headers and obvious function docs
- documents/* - Removed extractor/regenerator/truncation docs
- embedding/* - Removed pipeline and chunking docs
- gitlab/* - Removed client and transformer docs (kept type definitions)
- ingestion/* - Removed orchestrator and ingestion docs
- search/* - Removed FTS and vector search docs

Philosophy: Code should be self-documenting. Comments should explain
"why" (business decisions, non-obvious constraints) not "what" (which
the code itself shows). This change reduces noise and maintenance burden
while keeping the codebase just as understandable.

Retains comments for:
- Non-obvious business logic
- Important safety invariants
- Complex algorithm explanations
- Public API boundaries where generated docs matter

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions

View File

@@ -1,29 +1,13 @@
//! Logging infrastructure: dual-layer subscriber setup and log file retention.
//!
//! Provides a layered tracing subscriber with:
//! - **stderr layer**: Human-readable or JSON format, controlled by `-v` flags
//! - **file layer**: Always-on JSON output to daily-rotated log files
use std::fs;
use std::path::Path;
use tracing_subscriber::EnvFilter;
/// Build an `EnvFilter` from the verbosity count.
///
/// | Count | App Level | Dep Level |
/// |-------|-----------|-----------|
/// | 0 | INFO | WARN |
/// | 1 | DEBUG | WARN |
/// | 2 | DEBUG | INFO |
/// | 3+ | TRACE | DEBUG |
pub fn build_stderr_filter(verbose: u8, quiet: bool) -> EnvFilter {
// RUST_LOG always wins if set
if std::env::var("RUST_LOG").is_ok() {
return EnvFilter::from_default_env();
}
// -q overrides -v for stderr
if quiet {
return EnvFilter::new("lore=warn,error");
}
@@ -38,10 +22,6 @@ pub fn build_stderr_filter(verbose: u8, quiet: bool) -> EnvFilter {
EnvFilter::new(directives)
}
/// Build an `EnvFilter` for the file layer.
///
/// Always captures DEBUG+ for `lore::*` and WARN+ for dependencies,
/// unless `RUST_LOG` is set (which overrides everything).
pub fn build_file_filter() -> EnvFilter {
if std::env::var("RUST_LOG").is_ok() {
return EnvFilter::from_default_env();
@@ -50,10 +30,6 @@ pub fn build_file_filter() -> EnvFilter {
EnvFilter::new("lore=debug,warn")
}
/// Delete log files older than `retention_days` from the given directory.
///
/// Only deletes files matching the `lore.YYYY-MM-DD.log` pattern.
/// Returns the number of files deleted.
pub fn cleanup_old_logs(log_dir: &Path, retention_days: u32) -> usize {
if retention_days == 0 || !log_dir.exists() {
return 0;
@@ -72,7 +48,6 @@ pub fn cleanup_old_logs(log_dir: &Path, retention_days: u32) -> usize {
let file_name = entry.file_name();
let name = file_name.to_string_lossy();
// Match pattern: lore.YYYY-MM-DD.log or lore.YYYY-MM-DD (tracing-appender format)
if let Some(date_str) = extract_log_date(&name)
&& date_str < cutoff_date
&& fs::remove_file(entry.path()).is_ok()
@@ -84,28 +59,20 @@ pub fn cleanup_old_logs(log_dir: &Path, retention_days: u32) -> usize {
deleted
}
/// Extract the date portion from a log filename.
///
/// Matches: `lore.YYYY-MM-DD.log` or `lore.YYYY-MM-DD`
fn extract_log_date(filename: &str) -> Option<String> {
let rest = filename.strip_prefix("lore.")?;
// Must have at least YYYY-MM-DD (10 ASCII chars).
// Use get() to avoid panicking on non-ASCII filenames.
let date_part = rest.get(..10)?;
// Validate it looks like a date
let parts: Vec<&str> = date_part.split('-').collect();
if parts.len() != 3 || parts[0].len() != 4 || parts[1].len() != 2 || parts[2].len() != 2 {
return None;
}
// Check all parts are numeric (also ensures ASCII)
if !parts.iter().all(|p| p.chars().all(|c| c.is_ascii_digit())) {
return None;
}
// After the date, must be end-of-string or ".log"
let suffix = rest.get(10..)?;
if suffix.is_empty() || suffix == ".log" {
Some(date_part.to_string())
@@ -153,16 +120,13 @@ mod tests {
fn test_cleanup_old_logs_deletes_old_files() {
let dir = TempDir::new().unwrap();
// Create old log files (well before any reasonable retention)
File::create(dir.path().join("lore.2020-01-01.log")).unwrap();
File::create(dir.path().join("lore.2020-01-15.log")).unwrap();
// Create a recent log file (today)
let today = chrono::Utc::now().format("%Y-%m-%d").to_string();
let recent_name = format!("lore.{today}.log");
File::create(dir.path().join(&recent_name)).unwrap();
// Create a non-log file that should NOT be deleted
File::create(dir.path().join("other.txt")).unwrap();
let deleted = cleanup_old_logs(dir.path(), 7);
@@ -192,7 +156,6 @@ mod tests {
#[test]
fn test_build_stderr_filter_default() {
// Can't easily assert filter contents, but verify it doesn't panic
let _filter = build_stderr_filter(0, false);
}
@@ -206,7 +169,6 @@ mod tests {
#[test]
fn test_build_stderr_filter_quiet_overrides_verbose() {
// Quiet should win over verbose
let _filter = build_stderr_filter(3, true);
}