Files
gitlore/src/cli/commands/sync.rs
Taylor Eernisse 65583ed5d6 refactor: Remove redundant doc comments throughout codebase
Removes module-level doc comments (//! lines) and excessive inline doc
comments that were duplicating information already evident from:
- Function/struct names (self-documenting code)
- Type signatures (the what is clear from types)
- Implementation context (the how is clear from code)

Affected modules:
- cli/* - Removed command descriptions duplicating clap help text
- core/* - Removed module headers and obvious function docs
- documents/* - Removed extractor/regenerator/truncation docs
- embedding/* - Removed pipeline and chunking docs
- gitlab/* - Removed client and transformer docs (kept type definitions)
- ingestion/* - Removed orchestrator and ingestion docs
- search/* - Removed FTS and vector search docs

Philosophy: Code should be self-documenting. Comments should explain
"why" (business decisions, non-obvious constraints) not "what" (which
the code itself shows). This change reduces noise and maintenance burden
while keeping the codebase just as understandable.

Retains comments for:
- Non-obvious business logic
- Important safety invariants
- Complex algorithm explanations
- Public API boundaries where generated docs matter

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 00:04:32 -05:00

372 lines
12 KiB
Rust

use console::style;
use indicatif::{ProgressBar, ProgressStyle};
use serde::Serialize;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use tracing::Instrument;
use tracing::{info, warn};
use crate::Config;
use crate::core::error::Result;
use crate::core::metrics::{MetricsLayer, StageTiming};
use super::embed::run_embed;
use super::generate_docs::run_generate_docs;
use super::ingest::{IngestDisplay, run_ingest};
#[derive(Debug, Default)]
pub struct SyncOptions {
pub full: bool,
pub force: bool,
pub no_embed: bool,
pub no_docs: bool,
pub no_events: bool,
pub robot_mode: bool,
}
#[derive(Debug, Default, Serialize)]
pub struct SyncResult {
#[serde(skip)]
pub run_id: String,
pub issues_updated: usize,
pub mrs_updated: usize,
pub discussions_fetched: usize,
pub resource_events_fetched: usize,
pub resource_events_failed: usize,
pub documents_regenerated: usize,
pub documents_embedded: usize,
}
fn stage_spinner(stage: u8, total: u8, msg: &str, robot_mode: bool) -> ProgressBar {
if robot_mode {
return ProgressBar::hidden();
}
let pb = crate::cli::progress::multi().add(ProgressBar::new_spinner());
pb.set_style(
ProgressStyle::default_spinner()
.template("{spinner:.blue} {prefix} {msg}")
.expect("valid template"),
);
pb.enable_steady_tick(std::time::Duration::from_millis(80));
pb.set_prefix(format!("[{stage}/{total}]"));
pb.set_message(msg.to_string());
pb
}
pub async fn run_sync(
config: &Config,
options: SyncOptions,
run_id: Option<&str>,
) -> Result<SyncResult> {
let generated_id;
let run_id = match run_id {
Some(id) => id,
None => {
generated_id = uuid::Uuid::new_v4().simple().to_string();
&generated_id[..8]
}
};
let span = tracing::info_span!("sync", %run_id);
async move {
let mut result = SyncResult {
run_id: run_id.to_string(),
..SyncResult::default()
};
let ingest_display = if options.robot_mode {
IngestDisplay::silent()
} else {
IngestDisplay::progress_only()
};
let total_stages: u8 = if options.no_docs && options.no_embed {
2
} else if options.no_docs || options.no_embed {
3
} else {
4
};
let mut current_stage: u8 = 0;
current_stage += 1;
let spinner = stage_spinner(
current_stage,
total_stages,
"Fetching issues from GitLab...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: ingesting issues");
let issues_result = run_ingest(
config,
"issues",
None,
options.force,
options.full,
ingest_display,
Some(spinner.clone()),
)
.await?;
result.issues_updated = issues_result.issues_upserted;
result.discussions_fetched += issues_result.discussions_fetched;
result.resource_events_fetched += issues_result.resource_events_fetched;
result.resource_events_failed += issues_result.resource_events_failed;
spinner.finish_and_clear();
current_stage += 1;
let spinner = stage_spinner(
current_stage,
total_stages,
"Fetching merge requests from GitLab...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: ingesting merge requests");
let mrs_result = run_ingest(
config,
"mrs",
None,
options.force,
options.full,
ingest_display,
Some(spinner.clone()),
)
.await?;
result.mrs_updated = mrs_result.mrs_upserted;
result.discussions_fetched += mrs_result.discussions_fetched;
result.resource_events_fetched += mrs_result.resource_events_fetched;
result.resource_events_failed += mrs_result.resource_events_failed;
spinner.finish_and_clear();
if !options.no_docs {
current_stage += 1;
let spinner = stage_spinner(
current_stage,
total_stages,
"Processing documents...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: generating documents");
let docs_bar = if options.robot_mode {
ProgressBar::hidden()
} else {
let b = crate::cli::progress::multi().add(ProgressBar::new(0));
b.set_style(
ProgressStyle::default_bar()
.template(
" {spinner:.blue} Processing documents [{bar:30.cyan/dim}] {pos}/{len}",
)
.unwrap()
.progress_chars("=> "),
);
b
};
let docs_bar_clone = docs_bar.clone();
let tick_started = Arc::new(AtomicBool::new(false));
let tick_started_clone = Arc::clone(&tick_started);
let docs_cb: Box<dyn Fn(usize, usize)> = Box::new(move |processed, total| {
if total > 0 {
if !tick_started_clone.swap(true, Ordering::Relaxed) {
docs_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100));
}
docs_bar_clone.set_length(total as u64);
docs_bar_clone.set_position(processed as u64);
}
});
let docs_result = run_generate_docs(config, false, None, Some(docs_cb))?;
result.documents_regenerated = docs_result.regenerated;
docs_bar.finish_and_clear();
spinner.finish_and_clear();
} else {
info!("Sync: skipping document generation (--no-docs)");
}
if !options.no_embed {
current_stage += 1;
let spinner = stage_spinner(
current_stage,
total_stages,
"Generating embeddings...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: embedding documents");
let embed_bar = if options.robot_mode {
ProgressBar::hidden()
} else {
let b = crate::cli::progress::multi().add(ProgressBar::new(0));
b.set_style(
ProgressStyle::default_bar()
.template(
" {spinner:.blue} Generating embeddings [{bar:30.cyan/dim}] {pos}/{len}",
)
.unwrap()
.progress_chars("=> "),
);
b
};
let embed_bar_clone = embed_bar.clone();
let tick_started = Arc::new(AtomicBool::new(false));
let tick_started_clone = Arc::clone(&tick_started);
let embed_cb: Box<dyn Fn(usize, usize)> = Box::new(move |processed, total| {
if total > 0 {
if !tick_started_clone.swap(true, Ordering::Relaxed) {
embed_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100));
}
embed_bar_clone.set_length(total as u64);
embed_bar_clone.set_position(processed as u64);
}
});
match run_embed(config, options.full, false, Some(embed_cb)).await {
Ok(embed_result) => {
result.documents_embedded = embed_result.embedded;
embed_bar.finish_and_clear();
spinner.finish_and_clear();
}
Err(e) => {
embed_bar.finish_and_clear();
spinner.finish_and_clear();
if !options.robot_mode {
eprintln!(" {} Embedding skipped ({})", style("warn").yellow(), e);
}
warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
}
}
} else {
info!("Sync: skipping embedding (--no-embed)");
}
info!(
issues = result.issues_updated,
mrs = result.mrs_updated,
discussions = result.discussions_fetched,
resource_events = result.resource_events_fetched,
resource_events_failed = result.resource_events_failed,
docs = result.documents_regenerated,
embedded = result.documents_embedded,
"Sync pipeline complete"
);
Ok(result)
}
.instrument(span)
.await
}
pub fn print_sync(
result: &SyncResult,
elapsed: std::time::Duration,
metrics: Option<&MetricsLayer>,
) {
println!("{} Sync complete:", style("done").green().bold(),);
println!(" Issues updated: {}", result.issues_updated);
println!(" MRs updated: {}", result.mrs_updated);
println!(
" Discussions fetched: {}",
result.discussions_fetched
);
if result.resource_events_fetched > 0 || result.resource_events_failed > 0 {
println!(
" Resource events fetched: {}",
result.resource_events_fetched
);
if result.resource_events_failed > 0 {
println!(
" Resource events failed: {}",
result.resource_events_failed
);
}
}
println!(
" Documents regenerated: {}",
result.documents_regenerated
);
println!(" Documents embedded: {}", result.documents_embedded);
println!(" Elapsed: {:.1}s", elapsed.as_secs_f64());
if let Some(metrics) = metrics {
let stages = metrics.extract_timings();
if !stages.is_empty() {
print_timing_summary(&stages);
}
}
}
fn print_timing_summary(stages: &[StageTiming]) {
println!();
println!("{}", style("Stage timing:").dim());
for stage in stages {
for sub in &stage.sub_stages {
print_stage_line(sub, 1);
}
}
}
fn print_stage_line(stage: &StageTiming, depth: usize) {
let indent = " ".repeat(depth);
let name = if let Some(ref project) = stage.project {
format!("{} ({})", stage.name, project)
} else {
stage.name.clone()
};
let pad_width = 30_usize.saturating_sub(indent.len() + name.len());
let dots = ".".repeat(pad_width.max(2));
let mut suffix = String::new();
if stage.items_processed > 0 {
suffix.push_str(&format!("{} items", stage.items_processed));
}
if stage.errors > 0 {
if !suffix.is_empty() {
suffix.push_str(", ");
}
suffix.push_str(&format!("{} errors", stage.errors));
}
if stage.rate_limit_hits > 0 {
if !suffix.is_empty() {
suffix.push_str(", ");
}
suffix.push_str(&format!("{} rate limits", stage.rate_limit_hits));
}
let time_str = format!("{:.1}s", stage.elapsed_ms as f64 / 1000.0);
if suffix.is_empty() {
println!("{indent}{name} {dots} {time_str}");
} else {
println!("{indent}{name} {dots} {time_str} ({suffix})");
}
for sub in &stage.sub_stages {
print_stage_line(sub, depth + 1);
}
}
#[derive(Serialize)]
struct SyncJsonOutput<'a> {
ok: bool,
data: &'a SyncResult,
meta: SyncMeta,
}
#[derive(Serialize)]
struct SyncMeta {
run_id: String,
elapsed_ms: u64,
#[serde(skip_serializing_if = "Vec::is_empty")]
stages: Vec<StageTiming>,
}
pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64, metrics: Option<&MetricsLayer>) {
let stages = metrics.map_or_else(Vec::new, MetricsLayer::extract_timings);
let output = SyncJsonOutput {
ok: true,
data: result,
meta: SyncMeta {
run_id: result.run_id.clone(),
elapsed_ms,
stages,
},
};
println!("{}", serde_json::to_string(&output).unwrap());
}