refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc
comments that were duplicating information already evident from:
- Function/struct names (self-documenting code)
- Type signatures (the what is clear from types)
- Implementation context (the how is clear from code)

Affected modules:
- cli/* - Removed command descriptions duplicating clap help text
- core/* - Removed module headers and obvious function docs
- documents/* - Removed extractor/regenerator/truncation docs
- embedding/* - Removed pipeline and chunking docs
- gitlab/* - Removed client and transformer docs (kept type definitions)
- ingestion/* - Removed orchestrator and ingestion docs
- search/* - Removed FTS and vector search docs

Philosophy: Code should be self-documenting. Comments should explain
"why" (business decisions, non-obvious constraints) not "what" (which
the code itself shows). This change reduces noise and maintenance burden
while keeping the codebase just as understandable.

Retains comments for:
- Non-obvious business logic
- Important safety invariants
- Complex algorithm explanations
- Public API boundaries where generated docs matter

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions

View File

@@ -1,5 +1,3 @@
//! Ingest command - fetch data from GitLab.
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
@@ -22,17 +20,14 @@ use crate::ingestion::{
ingest_project_merge_requests_with_progress,
};
/// Result of ingest command for display.
#[derive(Default)]
pub struct IngestResult {
pub resource_type: String,
pub projects_synced: usize,
// Issue-specific fields
pub issues_fetched: usize,
pub issues_upserted: usize,
pub issues_synced_discussions: usize,
pub issues_skipped_discussion_sync: usize,
// MR-specific fields
pub mrs_fetched: usize,
pub mrs_upserted: usize,
pub mrs_synced_discussions: usize,
@@ -40,17 +35,13 @@ pub struct IngestResult {
pub assignees_linked: usize,
pub reviewers_linked: usize,
pub diffnotes_count: usize,
// Shared fields
pub labels_created: usize,
pub discussions_fetched: usize,
pub notes_upserted: usize,
// Resource events
pub resource_events_fetched: usize,
pub resource_events_failed: usize,
}
/// Outcome of ingesting a single project, used to aggregate results
/// from concurrent project processing.
enum ProjectIngestOutcome {
Issues {
path: String,
@@ -62,24 +53,14 @@ enum ProjectIngestOutcome {
},
}
/// Controls what interactive UI elements `run_ingest` displays.
///
/// Separates progress indicators (spinners, bars) from text output (headers,
/// per-project summaries) so callers like `sync` can show progress without
/// duplicating summary text.
#[derive(Debug, Clone, Copy)]
pub struct IngestDisplay {
/// Show animated spinners and progress bars.
pub show_progress: bool,
/// Show the per-project spinner. When called from `sync`, the stage
/// spinner already covers this, so a second spinner causes flashing.
pub show_spinner: bool,
/// Show text headers ("Ingesting...") and per-project summary lines.
pub show_text: bool,
}
impl IngestDisplay {
/// Interactive mode: everything visible.
pub fn interactive() -> Self {
Self {
show_progress: true,
@@ -88,7 +69,6 @@ impl IngestDisplay {
}
}
/// Robot/JSON mode: everything hidden.
pub fn silent() -> Self {
Self {
show_progress: false,
@@ -97,8 +77,6 @@ impl IngestDisplay {
}
}
/// Progress bars only, no spinner or text (used by sync which provides its
/// own stage spinner).
pub fn progress_only() -> Self {
Self {
show_progress: true,
@@ -108,10 +86,6 @@ impl IngestDisplay {
}
}
/// Run the ingest command.
///
/// `stage_bar` is an optional `ProgressBar` (typically from sync's stage spinner)
/// that will be updated with aggregate progress across all projects.
pub async fn run_ingest(
config: &Config,
resource_type: &str,
@@ -138,7 +112,6 @@ pub async fn run_ingest(
.await
}
/// Inner implementation of run_ingest, instrumented with a root span.
async fn run_ingest_inner(
config: &Config,
resource_type: &str,
@@ -148,7 +121,6 @@ async fn run_ingest_inner(
display: IngestDisplay,
stage_bar: Option<ProgressBar>,
) -> Result<IngestResult> {
// Validate resource type early
if resource_type != "issues" && resource_type != "mrs" {
return Err(LoreError::Other(format!(
"Invalid resource type '{}'. Valid types: issues, mrs",
@@ -156,11 +128,9 @@ async fn run_ingest_inner(
)));
}
// Get database path and create connection
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
// Acquire single-flight lock
let lock_conn = create_connection(&db_path)?;
let mut lock = AppLock::new(
lock_conn,
@@ -172,23 +142,19 @@ async fn run_ingest_inner(
);
lock.acquire(force)?;
// Get token from environment
let token =
std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet {
env_var: config.gitlab.token_env_var.clone(),
})?;
// Create GitLab client
let client = GitLabClient::new(
&config.gitlab.base_url,
&token,
Some(config.sync.requests_per_second),
);
// Get projects to sync
let projects = get_projects_to_sync(&conn, &config.projects, project_filter)?;
// If --full flag is set, reset sync cursors and discussion watermarks for a complete re-fetch
if full {
if display.show_text {
println!(
@@ -198,20 +164,17 @@ async fn run_ingest_inner(
}
for (local_project_id, _, path) in &projects {
if resource_type == "issues" {
// Reset issue discussion and resource event watermarks so everything gets re-synced
conn.execute(
"UPDATE issues SET discussions_synced_for_updated_at = NULL, resource_events_synced_for_updated_at = NULL WHERE project_id = ?",
[*local_project_id],
)?;
} else if resource_type == "mrs" {
// Reset MR discussion and resource event watermarks
conn.execute(
"UPDATE merge_requests SET discussions_synced_for_updated_at = NULL, resource_events_synced_for_updated_at = NULL WHERE project_id = ?",
[*local_project_id],
)?;
}
// Then reset sync cursor
conn.execute(
"DELETE FROM sync_cursors WHERE project_id = ? AND resource_type = ?",
(*local_project_id, resource_type),
@@ -248,12 +211,9 @@ async fn run_ingest_inner(
println!();
}
// Process projects concurrently. Each project gets its own DB connection
// while sharing the rate limiter through the cloned GitLabClient.
let concurrency = config.sync.primary_concurrency as usize;
let resource_type_owned = resource_type.to_string();
// Aggregate counters for stage_bar updates (shared across concurrent projects)
let agg_fetched = Arc::new(AtomicUsize::new(0));
let agg_discussions = Arc::new(AtomicUsize::new(0));
let agg_disc_total = Arc::new(AtomicUsize::new(0));
@@ -328,7 +288,6 @@ async fn run_ingest_inner(
} else {
Box::new(move |event: ProgressEvent| match event {
ProgressEvent::IssuesFetchStarted | ProgressEvent::MrsFetchStarted => {
// Spinner already showing fetch message
}
ProgressEvent::IssuesFetchComplete { total } | ProgressEvent::MrsFetchComplete { total } => {
let agg = agg_fetched_clone.fetch_add(total, Ordering::Relaxed) + total;
@@ -410,6 +369,20 @@ async fn run_ingest_inner(
ProgressEvent::ResourceEventsFetchComplete { .. } => {
disc_bar_clone.finish_and_clear();
}
ProgressEvent::ClosesIssuesFetchStarted { total } => {
disc_bar_clone.reset();
disc_bar_clone.set_length(total as u64);
disc_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100));
stage_bar_clone.set_message(
"Fetching closes-issues references...".to_string()
);
}
ProgressEvent::ClosesIssueFetched { current, total: _ } => {
disc_bar_clone.set_position(current as u64);
}
ProgressEvent::ClosesIssuesFetchComplete { .. } => {
disc_bar_clone.finish_and_clear();
}
})
};
@@ -453,9 +426,6 @@ async fn run_ingest_inner(
.collect()
.await;
// Aggregate results and print per-project summaries.
// Process all successes first, then return the first error (if any)
// so that successful project summaries are always printed.
let mut first_error: Option<LoreError> = None;
for project_result in project_results {
match project_result {
@@ -510,21 +480,17 @@ async fn run_ingest_inner(
return Err(e);
}
// Lock is released on drop
Ok(total)
}
/// Get projects to sync from database, optionally filtered.
fn get_projects_to_sync(
conn: &Connection,
configured_projects: &[crate::core::config::ProjectConfig],
filter: Option<&str>,
) -> Result<Vec<(i64, i64, String)>> {
// If a filter is provided, resolve it to a specific project
if let Some(filter_str) = filter {
let project_id = resolve_project(conn, filter_str)?;
// Verify the resolved project is in our config
let row: Option<(i64, String)> = conn
.query_row(
"SELECT gitlab_project_id, path_with_namespace FROM projects WHERE id = ?1",
@@ -534,7 +500,6 @@ fn get_projects_to_sync(
.ok();
if let Some((gitlab_id, path)) = row {
// Confirm it's a configured project
if configured_projects.iter().any(|p| p.path == path) {
return Ok(vec![(project_id, gitlab_id, path)]);
}
@@ -550,7 +515,6 @@ fn get_projects_to_sync(
)));
}
// No filter: return all configured projects
let mut projects = Vec::new();
for project_config in configured_projects {
let result: Option<(i64, i64)> = conn
@@ -569,7 +533,6 @@ fn get_projects_to_sync(
Ok(projects)
}
/// Print summary for a single project (issues).
fn print_issue_project_summary(path: &str, result: &IngestProjectResult) {
let labels_str = if result.labels_created > 0 {
format!(", {} new labels", result.labels_created)
@@ -599,7 +562,6 @@ fn print_issue_project_summary(path: &str, result: &IngestProjectResult) {
}
}
/// Print summary for a single project (merge requests).
fn print_mr_project_summary(path: &str, result: &IngestMrProjectResult) {
let labels_str = if result.labels_created > 0 {
format!(", {} new labels", result.labels_created)
@@ -647,7 +609,6 @@ fn print_mr_project_summary(path: &str, result: &IngestMrProjectResult) {
}
}
/// JSON output structures for robot mode.
#[derive(Serialize)]
struct IngestJsonOutput {
ok: bool,
@@ -688,7 +649,6 @@ struct IngestMrStats {
diffnotes_count: usize,
}
/// Print final summary as JSON (robot mode).
pub fn print_ingest_summary_json(result: &IngestResult) {
let (issues, merge_requests) = if result.resource_type == "issues" {
(
@@ -733,7 +693,6 @@ pub fn print_ingest_summary_json(result: &IngestResult) {
println!("{}", serde_json::to_string(&output).unwrap());
}
/// Print final summary.
pub fn print_ingest_summary(result: &IngestResult) {
println!();