//! Ingest command - fetch data from GitLab. use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; use console::style; use indicatif::{ProgressBar, ProgressStyle}; use rusqlite::Connection; use serde::Serialize; use tracing::Instrument; use crate::Config; use crate::core::db::create_connection; use crate::core::error::{LoreError, Result}; use crate::core::lock::{AppLock, LockOptions}; use crate::core::paths::get_db_path; use crate::core::project::resolve_project; use crate::gitlab::GitLabClient; use crate::ingestion::{ IngestMrProjectResult, IngestProjectResult, ProgressEvent, ingest_project_issues_with_progress, ingest_project_merge_requests_with_progress, }; /// Result of ingest command for display. #[derive(Default)] pub struct IngestResult { pub resource_type: String, pub projects_synced: usize, // Issue-specific fields pub issues_fetched: usize, pub issues_upserted: usize, pub issues_synced_discussions: usize, pub issues_skipped_discussion_sync: usize, // MR-specific fields pub mrs_fetched: usize, pub mrs_upserted: usize, pub mrs_synced_discussions: usize, pub mrs_skipped_discussion_sync: usize, pub assignees_linked: usize, pub reviewers_linked: usize, pub diffnotes_count: usize, // Shared fields pub labels_created: usize, pub discussions_fetched: usize, pub notes_upserted: usize, // Resource events pub resource_events_fetched: usize, pub resource_events_failed: usize, } /// Outcome of ingesting a single project, used to aggregate results /// from concurrent project processing. enum ProjectIngestOutcome { Issues { path: String, result: IngestProjectResult, }, Mrs { path: String, result: IngestMrProjectResult, }, } /// Controls what interactive UI elements `run_ingest` displays. /// /// Separates progress indicators (spinners, bars) from text output (headers, /// per-project summaries) so callers like `sync` can show progress without /// duplicating summary text. #[derive(Debug, Clone, Copy)] pub struct IngestDisplay { /// Show animated spinners and progress bars. pub show_progress: bool, /// Show the per-project spinner. When called from `sync`, the stage /// spinner already covers this, so a second spinner causes flashing. pub show_spinner: bool, /// Show text headers ("Ingesting...") and per-project summary lines. pub show_text: bool, } impl IngestDisplay { /// Interactive mode: everything visible. pub fn interactive() -> Self { Self { show_progress: true, show_spinner: true, show_text: true, } } /// Robot/JSON mode: everything hidden. pub fn silent() -> Self { Self { show_progress: false, show_spinner: false, show_text: false, } } /// Progress bars only, no spinner or text (used by sync which provides its /// own stage spinner). pub fn progress_only() -> Self { Self { show_progress: true, show_spinner: false, show_text: false, } } } /// Run the ingest command. /// /// `stage_bar` is an optional `ProgressBar` (typically from sync's stage spinner) /// that will be updated with aggregate progress across all projects. pub async fn run_ingest( config: &Config, resource_type: &str, project_filter: Option<&str>, force: bool, full: bool, display: IngestDisplay, stage_bar: Option, ) -> Result { let run_id = uuid::Uuid::new_v4().simple().to_string(); let run_id = &run_id[..8]; let span = tracing::info_span!("ingest", %run_id, %resource_type); run_ingest_inner( config, resource_type, project_filter, force, full, display, stage_bar, ) .instrument(span) .await } /// Inner implementation of run_ingest, instrumented with a root span. async fn run_ingest_inner( config: &Config, resource_type: &str, project_filter: Option<&str>, force: bool, full: bool, display: IngestDisplay, stage_bar: Option, ) -> Result { // Validate resource type early if resource_type != "issues" && resource_type != "mrs" { return Err(LoreError::Other(format!( "Invalid resource type '{}'. Valid types: issues, mrs", resource_type ))); } // Get database path and create connection let db_path = get_db_path(config.storage.db_path.as_deref()); let conn = create_connection(&db_path)?; // Acquire single-flight lock let lock_conn = create_connection(&db_path)?; let mut lock = AppLock::new( lock_conn, LockOptions { name: "sync".to_string(), stale_lock_minutes: config.sync.stale_lock_minutes, heartbeat_interval_seconds: config.sync.heartbeat_interval_seconds, }, ); lock.acquire(force)?; // Get token from environment let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet { env_var: config.gitlab.token_env_var.clone(), })?; // Create GitLab client let client = GitLabClient::new( &config.gitlab.base_url, &token, Some(config.sync.requests_per_second), ); // Get projects to sync let projects = get_projects_to_sync(&conn, &config.projects, project_filter)?; // If --full flag is set, reset sync cursors and discussion watermarks for a complete re-fetch if full { if display.show_text { println!( "{}", style("Full sync: resetting cursors to fetch all data...").yellow() ); } for (local_project_id, _, path) in &projects { if resource_type == "issues" { // Reset issue discussion and resource event watermarks so everything gets re-synced conn.execute( "UPDATE issues SET discussions_synced_for_updated_at = NULL, resource_events_synced_for_updated_at = NULL WHERE project_id = ?", [*local_project_id], )?; } else if resource_type == "mrs" { // Reset MR discussion and resource event watermarks conn.execute( "UPDATE merge_requests SET discussions_synced_for_updated_at = NULL, resource_events_synced_for_updated_at = NULL WHERE project_id = ?", [*local_project_id], )?; } // Then reset sync cursor conn.execute( "DELETE FROM sync_cursors WHERE project_id = ? AND resource_type = ?", (*local_project_id, resource_type), )?; tracing::info!(project = %path, resource_type, "Reset sync cursor and discussion watermarks for full re-fetch"); } } if projects.is_empty() { if let Some(filter) = project_filter { return Err(LoreError::Other(format!( "Project '{}' not found in configuration", filter ))); } return Err(LoreError::Other( "No projects configured. Run 'lore init' first.".to_string(), )); } let mut total = IngestResult { resource_type: resource_type.to_string(), ..Default::default() }; let type_label = if resource_type == "issues" { "issues" } else { "merge requests" }; if display.show_text { println!("{}", style(format!("Ingesting {type_label}...")).blue()); println!(); } // Process projects concurrently. Each project gets its own DB connection // while sharing the rate limiter through the cloned GitLabClient. let concurrency = config.sync.primary_concurrency as usize; let resource_type_owned = resource_type.to_string(); // Aggregate counters for stage_bar updates (shared across concurrent projects) let agg_fetched = Arc::new(AtomicUsize::new(0)); let agg_discussions = Arc::new(AtomicUsize::new(0)); let agg_disc_total = Arc::new(AtomicUsize::new(0)); let agg_events = Arc::new(AtomicUsize::new(0)); let agg_events_total = Arc::new(AtomicUsize::new(0)); let stage_bar = stage_bar.unwrap_or_else(ProgressBar::hidden); use futures::stream::{self, StreamExt}; let project_results: Vec> = stream::iter(projects.iter()) .map(|(local_project_id, gitlab_project_id, path)| { let client = client.clone(); let db_path = db_path.clone(); let config = config.clone(); let resource_type = resource_type_owned.clone(); let path = path.clone(); let local_project_id = *local_project_id; let gitlab_project_id = *gitlab_project_id; let stage_bar = stage_bar.clone(); let agg_fetched = Arc::clone(&agg_fetched); let agg_discussions = Arc::clone(&agg_discussions); let agg_disc_total = Arc::clone(&agg_disc_total); let agg_events = Arc::clone(&agg_events); let agg_events_total = Arc::clone(&agg_events_total); async move { let proj_conn = create_connection(&db_path)?; let multi = crate::cli::progress::multi(); let spinner = if !display.show_spinner { ProgressBar::hidden() } else { let s = multi.add(ProgressBar::new_spinner()); s.set_style( ProgressStyle::default_spinner() .template("{spinner:.blue} {msg}") .unwrap(), ); s.set_message(format!("Fetching {type_label} from {path}...")); s.enable_steady_tick(std::time::Duration::from_millis(100)); s }; let disc_bar = if !display.show_progress { ProgressBar::hidden() } else { let b = multi.add(ProgressBar::new(0)); b.set_style( ProgressStyle::default_bar() .template( " {spinner:.blue} {prefix:.cyan} Syncing discussions [{bar:30.cyan/dim}] {pos}/{len}", ) .unwrap() .progress_chars("=> "), ); b.set_prefix(path.clone()); b }; let spinner_clone = spinner.clone(); let disc_bar_clone = disc_bar.clone(); let stage_bar_clone = stage_bar.clone(); let agg_fetched_clone = Arc::clone(&agg_fetched); let agg_discussions_clone = Arc::clone(&agg_discussions); let agg_disc_total_clone = Arc::clone(&agg_disc_total); let agg_events_clone = Arc::clone(&agg_events); let agg_events_total_clone = Arc::clone(&agg_events_total); let path_for_cb = path.clone(); let progress_callback: crate::ingestion::ProgressCallback = if !display.show_progress { Box::new(|_| {}) } else { Box::new(move |event: ProgressEvent| match event { ProgressEvent::IssuesFetchStarted | ProgressEvent::MrsFetchStarted => { // Spinner already showing fetch message } ProgressEvent::IssuesFetchComplete { total } | ProgressEvent::MrsFetchComplete { total } => { let agg = agg_fetched_clone.fetch_add(total, Ordering::Relaxed) + total; spinner_clone.set_message(format!( "{path_for_cb}: {total} {type_label} fetched" )); stage_bar_clone.set_message(format!( "Fetching {type_label}... ({agg} fetched across projects)" )); } ProgressEvent::IssueFetched { count } | ProgressEvent::MrFetched { count } => { spinner_clone.set_message(format!( "{path_for_cb}: {count} fetched so far..." )); } ProgressEvent::DiscussionSyncStarted { total } => { spinner_clone.finish_and_clear(); let agg_total = agg_disc_total_clone.fetch_add(total, Ordering::Relaxed) + total; disc_bar_clone.set_length(total as u64); disc_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100)); stage_bar_clone.set_message(format!( "Syncing discussions... (0/{agg_total})" )); } ProgressEvent::DiscussionSynced { current, total: _ } => { disc_bar_clone.set_position(current as u64); let agg = agg_discussions_clone.fetch_add(1, Ordering::Relaxed) + 1; let agg_total = agg_disc_total_clone.load(Ordering::Relaxed); stage_bar_clone.set_message(format!( "Syncing discussions... ({agg}/{agg_total})" )); } ProgressEvent::DiscussionSyncComplete => { disc_bar_clone.finish_and_clear(); } ProgressEvent::MrDiscussionSyncStarted { total } => { spinner_clone.finish_and_clear(); let agg_total = agg_disc_total_clone.fetch_add(total, Ordering::Relaxed) + total; disc_bar_clone.set_length(total as u64); disc_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100)); stage_bar_clone.set_message(format!( "Syncing discussions... (0/{agg_total})" )); } ProgressEvent::MrDiscussionSynced { current, total: _ } => { disc_bar_clone.set_position(current as u64); let agg = agg_discussions_clone.fetch_add(1, Ordering::Relaxed) + 1; let agg_total = agg_disc_total_clone.load(Ordering::Relaxed); stage_bar_clone.set_message(format!( "Syncing discussions... ({agg}/{agg_total})" )); } ProgressEvent::MrDiscussionSyncComplete => { disc_bar_clone.finish_and_clear(); } ProgressEvent::ResourceEventsFetchStarted { total } => { disc_bar_clone.reset(); disc_bar_clone.set_length(total as u64); disc_bar_clone.set_style( ProgressStyle::default_bar() .template(" {spinner:.blue} {prefix:.cyan} Fetching resource events [{bar:30.cyan/dim}] {pos}/{len}") .unwrap() .progress_chars("=> "), ); disc_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100)); agg_events_total_clone.fetch_add(total, Ordering::Relaxed); stage_bar_clone.set_message( "Fetching resource events...".to_string() ); } ProgressEvent::ResourceEventFetched { current, total: _ } => { disc_bar_clone.set_position(current as u64); let agg = agg_events_clone.fetch_add(1, Ordering::Relaxed) + 1; let agg_total = agg_events_total_clone.load(Ordering::Relaxed); stage_bar_clone.set_message(format!( "Fetching resource events... ({agg}/{agg_total})" )); } ProgressEvent::ResourceEventsFetchComplete { .. } => { disc_bar_clone.finish_and_clear(); } }) }; let outcome = if resource_type == "issues" { let result = ingest_project_issues_with_progress( &proj_conn, &client, &config, local_project_id, gitlab_project_id, Some(progress_callback), ) .await?; spinner.finish_and_clear(); disc_bar.finish_and_clear(); ProjectIngestOutcome::Issues { path, result } } else { let result = ingest_project_merge_requests_with_progress( &proj_conn, &client, &config, local_project_id, gitlab_project_id, full, Some(progress_callback), ) .await?; spinner.finish_and_clear(); disc_bar.finish_and_clear(); ProjectIngestOutcome::Mrs { path, result } }; Ok(outcome) } }) .buffer_unordered(concurrency) .collect() .await; // Aggregate results and print per-project summaries. // Process all successes first, then return the first error (if any) // so that successful project summaries are always printed. let mut first_error: Option = None; for project_result in project_results { match project_result { Err(e) => { if first_error.is_none() { first_error = Some(e); } } Ok(ProjectIngestOutcome::Issues { ref path, ref result, }) => { if display.show_text { print_issue_project_summary(path, result); } total.projects_synced += 1; total.issues_fetched += result.issues_fetched; total.issues_upserted += result.issues_upserted; total.labels_created += result.labels_created; total.discussions_fetched += result.discussions_fetched; total.notes_upserted += result.notes_upserted; total.issues_synced_discussions += result.issues_synced_discussions; total.issues_skipped_discussion_sync += result.issues_skipped_discussion_sync; total.resource_events_fetched += result.resource_events_fetched; total.resource_events_failed += result.resource_events_failed; } Ok(ProjectIngestOutcome::Mrs { ref path, ref result, }) => { if display.show_text { print_mr_project_summary(path, result); } total.projects_synced += 1; total.mrs_fetched += result.mrs_fetched; total.mrs_upserted += result.mrs_upserted; total.labels_created += result.labels_created; total.assignees_linked += result.assignees_linked; total.reviewers_linked += result.reviewers_linked; total.discussions_fetched += result.discussions_fetched; total.notes_upserted += result.notes_upserted; total.diffnotes_count += result.diffnotes_count; total.mrs_synced_discussions += result.mrs_synced_discussions; total.mrs_skipped_discussion_sync += result.mrs_skipped_discussion_sync; total.resource_events_fetched += result.resource_events_fetched; total.resource_events_failed += result.resource_events_failed; } } } if let Some(e) = first_error { return Err(e); } // Lock is released on drop Ok(total) } /// Get projects to sync from database, optionally filtered. fn get_projects_to_sync( conn: &Connection, configured_projects: &[crate::core::config::ProjectConfig], filter: Option<&str>, ) -> Result> { // If a filter is provided, resolve it to a specific project if let Some(filter_str) = filter { let project_id = resolve_project(conn, filter_str)?; // Verify the resolved project is in our config let row: Option<(i64, String)> = conn .query_row( "SELECT gitlab_project_id, path_with_namespace FROM projects WHERE id = ?1", [project_id], |row| Ok((row.get(0)?, row.get(1)?)), ) .ok(); if let Some((gitlab_id, path)) = row { // Confirm it's a configured project if configured_projects.iter().any(|p| p.path == path) { return Ok(vec![(project_id, gitlab_id, path)]); } return Err(LoreError::Other(format!( "Project '{}' exists in database but is not in configuration", path ))); } return Err(LoreError::Other(format!( "Project '{}' not found in database", filter_str ))); } // No filter: return all configured projects let mut projects = Vec::new(); for project_config in configured_projects { let result: Option<(i64, i64)> = conn .query_row( "SELECT id, gitlab_project_id FROM projects WHERE path_with_namespace = ?", [&project_config.path], |row| Ok((row.get(0)?, row.get(1)?)), ) .ok(); if let Some((local_id, gitlab_id)) = result { projects.push((local_id, gitlab_id, project_config.path.clone())); } } Ok(projects) } /// Print summary for a single project (issues). fn print_issue_project_summary(path: &str, result: &IngestProjectResult) { let labels_str = if result.labels_created > 0 { format!(", {} new labels", result.labels_created) } else { String::new() }; println!( " {}: {} issues fetched{}", style(path).cyan(), result.issues_upserted, labels_str ); if result.issues_synced_discussions > 0 { println!( " {} issues -> {} discussions, {} notes", result.issues_synced_discussions, result.discussions_fetched, result.notes_upserted ); } if result.issues_skipped_discussion_sync > 0 { println!( " {} unchanged issues (discussion sync skipped)", style(result.issues_skipped_discussion_sync).dim() ); } } /// Print summary for a single project (merge requests). fn print_mr_project_summary(path: &str, result: &IngestMrProjectResult) { let labels_str = if result.labels_created > 0 { format!(", {} new labels", result.labels_created) } else { String::new() }; let assignees_str = if result.assignees_linked > 0 || result.reviewers_linked > 0 { format!( ", {} assignees, {} reviewers", result.assignees_linked, result.reviewers_linked ) } else { String::new() }; println!( " {}: {} MRs fetched{}{}", style(path).cyan(), result.mrs_upserted, labels_str, assignees_str ); if result.mrs_synced_discussions > 0 { let diffnotes_str = if result.diffnotes_count > 0 { format!(" ({} diff notes)", result.diffnotes_count) } else { String::new() }; println!( " {} MRs -> {} discussions, {} notes{}", result.mrs_synced_discussions, result.discussions_fetched, result.notes_upserted, diffnotes_str ); } if result.mrs_skipped_discussion_sync > 0 { println!( " {} unchanged MRs (discussion sync skipped)", style(result.mrs_skipped_discussion_sync).dim() ); } } /// JSON output structures for robot mode. #[derive(Serialize)] struct IngestJsonOutput { ok: bool, data: IngestJsonData, } #[derive(Serialize)] struct IngestJsonData { resource_type: String, projects_synced: usize, #[serde(skip_serializing_if = "Option::is_none")] issues: Option, #[serde(skip_serializing_if = "Option::is_none")] merge_requests: Option, labels_created: usize, discussions_fetched: usize, notes_upserted: usize, resource_events_fetched: usize, resource_events_failed: usize, } #[derive(Serialize)] struct IngestIssueStats { fetched: usize, upserted: usize, synced_discussions: usize, skipped_discussion_sync: usize, } #[derive(Serialize)] struct IngestMrStats { fetched: usize, upserted: usize, synced_discussions: usize, skipped_discussion_sync: usize, assignees_linked: usize, reviewers_linked: usize, diffnotes_count: usize, } /// Print final summary as JSON (robot mode). pub fn print_ingest_summary_json(result: &IngestResult) { let (issues, merge_requests) = if result.resource_type == "issues" { ( Some(IngestIssueStats { fetched: result.issues_fetched, upserted: result.issues_upserted, synced_discussions: result.issues_synced_discussions, skipped_discussion_sync: result.issues_skipped_discussion_sync, }), None, ) } else { ( None, Some(IngestMrStats { fetched: result.mrs_fetched, upserted: result.mrs_upserted, synced_discussions: result.mrs_synced_discussions, skipped_discussion_sync: result.mrs_skipped_discussion_sync, assignees_linked: result.assignees_linked, reviewers_linked: result.reviewers_linked, diffnotes_count: result.diffnotes_count, }), ) }; let output = IngestJsonOutput { ok: true, data: IngestJsonData { resource_type: result.resource_type.clone(), projects_synced: result.projects_synced, issues, merge_requests, labels_created: result.labels_created, discussions_fetched: result.discussions_fetched, notes_upserted: result.notes_upserted, resource_events_fetched: result.resource_events_fetched, resource_events_failed: result.resource_events_failed, }, }; println!("{}", serde_json::to_string(&output).unwrap()); } /// Print final summary. pub fn print_ingest_summary(result: &IngestResult) { println!(); if result.resource_type == "issues" { println!( "{}", style(format!( "Total: {} issues, {} discussions, {} notes", result.issues_upserted, result.discussions_fetched, result.notes_upserted )) .green() ); if result.issues_skipped_discussion_sync > 0 { println!( "{}", style(format!( "Skipped discussion sync for {} unchanged issues.", result.issues_skipped_discussion_sync )) .dim() ); } } else { let diffnotes_str = if result.diffnotes_count > 0 { format!(" ({} diff notes)", result.diffnotes_count) } else { String::new() }; println!( "{}", style(format!( "Total: {} MRs, {} discussions, {} notes{}", result.mrs_upserted, result.discussions_fetched, result.notes_upserted, diffnotes_str )) .green() ); if result.mrs_skipped_discussion_sync > 0 { println!( "{}", style(format!( "Skipped discussion sync for {} unchanged MRs.", result.mrs_skipped_discussion_sync )) .dim() ); } } if result.resource_events_fetched > 0 || result.resource_events_failed > 0 { println!( " Resource events: {} fetched{}", result.resource_events_fetched, if result.resource_events_failed > 0 { format!(", {} failed", result.resource_events_failed) } else { String::new() } ); } }