feat(sync): Instrument pipeline with tracing spans, run_id correlation, and metrics
Add end-to-end observability to the sync and ingest pipelines: Sync command: - Generate UUID-based run_id for each sync invocation, propagated through all child spans for log correlation across stages - Accept MetricsLayer reference to extract hierarchical StageTiming data after pipeline completion for robot-mode performance output - Record sync runs in DB via SyncRunRecorder (start/succeed/fail lifecycle) - Wrap entire sync execution in a root tracing span with run_id field Ingest command: - Wrap run_ingest in an instrumented root span with run_id and resource_type - Add project path prefix to discussion progress bars for multi-project clarity - Reset resource_events_synced_for_updated_at on --full re-sync Sync status: - Expand from single last_run to configurable recent runs list (default 10) - Parse and expose StageTiming metrics from stored metrics_json - Add run_id, total_items_processed, total_errors to SyncRunInfo - Add mr_count to DataSummary for complete entity coverage Orchestrator: - Add #[instrument] with structured fields to issue and MR ingestion functions - Record items_processed, items_skipped, errors on span close for MetricsLayer - Emit granular progress events (IssuesFetchStarted, IssuesFetchComplete) - Pass project_id through to drain_resource_events for scoped job claiming Document regenerator and embedding pipeline: - Add #[instrument] spans with items_processed, items_skipped, errors fields - Record final counts on span close for metrics extraction Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -5,6 +5,8 @@ use indicatif::{ProgressBar, ProgressStyle};
|
|||||||
use rusqlite::Connection;
|
use rusqlite::Connection;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use tracing::Instrument;
|
||||||
|
|
||||||
use crate::Config;
|
use crate::Config;
|
||||||
use crate::core::db::create_connection;
|
use crate::core::db::create_connection;
|
||||||
use crate::core::error::{LoreError, Result};
|
use crate::core::error::{LoreError, Result};
|
||||||
@@ -111,6 +113,24 @@ pub async fn run_ingest(
|
|||||||
force: bool,
|
force: bool,
|
||||||
full: bool,
|
full: bool,
|
||||||
display: IngestDisplay,
|
display: IngestDisplay,
|
||||||
|
) -> Result<IngestResult> {
|
||||||
|
let run_id = uuid::Uuid::new_v4().simple().to_string();
|
||||||
|
let run_id = &run_id[..8];
|
||||||
|
let span = tracing::info_span!("ingest", %run_id, %resource_type);
|
||||||
|
|
||||||
|
run_ingest_inner(config, resource_type, project_filter, force, full, display)
|
||||||
|
.instrument(span)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inner implementation of run_ingest, instrumented with a root span.
|
||||||
|
async fn run_ingest_inner(
|
||||||
|
config: &Config,
|
||||||
|
resource_type: &str,
|
||||||
|
project_filter: Option<&str>,
|
||||||
|
force: bool,
|
||||||
|
full: bool,
|
||||||
|
display: IngestDisplay,
|
||||||
) -> Result<IngestResult> {
|
) -> Result<IngestResult> {
|
||||||
// Validate resource type early
|
// Validate resource type early
|
||||||
if resource_type != "issues" && resource_type != "mrs" {
|
if resource_type != "issues" && resource_type != "mrs" {
|
||||||
@@ -162,15 +182,15 @@ pub async fn run_ingest(
|
|||||||
}
|
}
|
||||||
for (local_project_id, _, path) in &projects {
|
for (local_project_id, _, path) in &projects {
|
||||||
if resource_type == "issues" {
|
if resource_type == "issues" {
|
||||||
// Reset issue discussion watermarks first so discussions get re-synced
|
// Reset issue discussion and resource event watermarks so everything gets re-synced
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"UPDATE issues SET discussions_synced_for_updated_at = NULL WHERE project_id = ?",
|
"UPDATE issues SET discussions_synced_for_updated_at = NULL, resource_events_synced_for_updated_at = NULL WHERE project_id = ?",
|
||||||
[*local_project_id],
|
[*local_project_id],
|
||||||
)?;
|
)?;
|
||||||
} else if resource_type == "mrs" {
|
} else if resource_type == "mrs" {
|
||||||
// Reset MR discussion watermarks
|
// Reset MR discussion and resource event watermarks
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"UPDATE merge_requests SET discussions_synced_for_updated_at = NULL WHERE project_id = ?",
|
"UPDATE merge_requests SET discussions_synced_for_updated_at = NULL, resource_events_synced_for_updated_at = NULL WHERE project_id = ?",
|
||||||
[*local_project_id],
|
[*local_project_id],
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@@ -255,11 +275,12 @@ pub async fn run_ingest(
|
|||||||
b.set_style(
|
b.set_style(
|
||||||
ProgressStyle::default_bar()
|
ProgressStyle::default_bar()
|
||||||
.template(
|
.template(
|
||||||
" {spinner:.blue} Syncing discussions [{bar:30.cyan/dim}] {pos}/{len}",
|
" {spinner:.blue} {prefix:.cyan} Syncing discussions [{bar:30.cyan/dim}] {pos}/{len}",
|
||||||
)
|
)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.progress_chars("=> "),
|
.progress_chars("=> "),
|
||||||
);
|
);
|
||||||
|
b.set_prefix(path.clone());
|
||||||
b
|
b
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -296,7 +317,7 @@ pub async fn run_ingest(
|
|||||||
disc_bar_clone.set_length(total as u64);
|
disc_bar_clone.set_length(total as u64);
|
||||||
disc_bar_clone.set_style(
|
disc_bar_clone.set_style(
|
||||||
ProgressStyle::default_bar()
|
ProgressStyle::default_bar()
|
||||||
.template(" {spinner:.blue} Fetching resource events [{bar:30.cyan/dim}] {pos}/{len}")
|
.template(" {spinner:.blue} {prefix:.cyan} Fetching resource events [{bar:30.cyan/dim}] {pos}/{len}")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.progress_chars("=> "),
|
.progress_chars("=> "),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -3,10 +3,12 @@
|
|||||||
use console::style;
|
use console::style;
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
use tracing::Instrument;
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
use crate::Config;
|
use crate::Config;
|
||||||
use crate::core::error::Result;
|
use crate::core::error::Result;
|
||||||
|
use crate::core::metrics::{MetricsLayer, StageTiming};
|
||||||
|
|
||||||
use super::embed::run_embed;
|
use super::embed::run_embed;
|
||||||
use super::generate_docs::run_generate_docs;
|
use super::generate_docs::run_generate_docs;
|
||||||
@@ -26,6 +28,8 @@ pub struct SyncOptions {
|
|||||||
/// Result of the sync command.
|
/// Result of the sync command.
|
||||||
#[derive(Debug, Default, Serialize)]
|
#[derive(Debug, Default, Serialize)]
|
||||||
pub struct SyncResult {
|
pub struct SyncResult {
|
||||||
|
#[serde(skip)]
|
||||||
|
pub run_id: String,
|
||||||
pub issues_updated: usize,
|
pub issues_updated: usize,
|
||||||
pub mrs_updated: usize,
|
pub mrs_updated: usize,
|
||||||
pub discussions_fetched: usize,
|
pub discussions_fetched: usize,
|
||||||
@@ -52,8 +56,30 @@ fn stage_spinner(stage: u8, total: u8, msg: &str, robot_mode: bool) -> ProgressB
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Run the full sync pipeline: ingest -> generate-docs -> embed.
|
/// Run the full sync pipeline: ingest -> generate-docs -> embed.
|
||||||
pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResult> {
|
///
|
||||||
let mut result = SyncResult::default();
|
/// `run_id` is an optional correlation ID for log/metrics tracing.
|
||||||
|
/// When called from `handle_sync_cmd`, this should be the same ID
|
||||||
|
/// stored in the `sync_runs` table so logs and DB records correlate.
|
||||||
|
pub async fn run_sync(
|
||||||
|
config: &Config,
|
||||||
|
options: SyncOptions,
|
||||||
|
run_id: Option<&str>,
|
||||||
|
) -> Result<SyncResult> {
|
||||||
|
let generated_id;
|
||||||
|
let run_id = match run_id {
|
||||||
|
Some(id) => id,
|
||||||
|
None => {
|
||||||
|
generated_id = uuid::Uuid::new_v4().simple().to_string();
|
||||||
|
&generated_id[..8]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let span = tracing::info_span!("sync", %run_id);
|
||||||
|
|
||||||
|
async move {
|
||||||
|
let mut result = SyncResult {
|
||||||
|
run_id: run_id.to_string(),
|
||||||
|
..SyncResult::default()
|
||||||
|
};
|
||||||
|
|
||||||
let ingest_display = if options.robot_mode {
|
let ingest_display = if options.robot_mode {
|
||||||
IngestDisplay::silent()
|
IngestDisplay::silent()
|
||||||
@@ -176,9 +202,16 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
|
|||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
.instrument(span)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
/// Print human-readable sync summary.
|
/// Print human-readable sync summary.
|
||||||
pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
|
pub fn print_sync(
|
||||||
|
result: &SyncResult,
|
||||||
|
elapsed: std::time::Duration,
|
||||||
|
metrics: Option<&MetricsLayer>,
|
||||||
|
) {
|
||||||
println!("{} Sync complete:", style("done").green().bold(),);
|
println!("{} Sync complete:", style("done").green().bold(),);
|
||||||
println!(" Issues updated: {}", result.issues_updated);
|
println!(" Issues updated: {}", result.issues_updated);
|
||||||
println!(" MRs updated: {}", result.mrs_updated);
|
println!(" MRs updated: {}", result.mrs_updated);
|
||||||
@@ -204,6 +237,65 @@ pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
|
|||||||
);
|
);
|
||||||
println!(" Documents embedded: {}", result.documents_embedded);
|
println!(" Documents embedded: {}", result.documents_embedded);
|
||||||
println!(" Elapsed: {:.1}s", elapsed.as_secs_f64());
|
println!(" Elapsed: {:.1}s", elapsed.as_secs_f64());
|
||||||
|
|
||||||
|
// Print per-stage timing breakdown if metrics are available
|
||||||
|
if let Some(metrics) = metrics {
|
||||||
|
let stages = metrics.extract_timings();
|
||||||
|
if !stages.is_empty() {
|
||||||
|
print_timing_summary(&stages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print per-stage timing breakdown for interactive users.
|
||||||
|
fn print_timing_summary(stages: &[StageTiming]) {
|
||||||
|
println!();
|
||||||
|
println!("{}", style("Stage timing:").dim());
|
||||||
|
for stage in stages {
|
||||||
|
for sub in &stage.sub_stages {
|
||||||
|
print_stage_line(sub, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print a single stage timing line with indentation.
|
||||||
|
fn print_stage_line(stage: &StageTiming, depth: usize) {
|
||||||
|
let indent = " ".repeat(depth);
|
||||||
|
let name = if let Some(ref project) = stage.project {
|
||||||
|
format!("{} ({})", stage.name, project)
|
||||||
|
} else {
|
||||||
|
stage.name.clone()
|
||||||
|
};
|
||||||
|
let pad_width = 30_usize.saturating_sub(indent.len() + name.len());
|
||||||
|
let dots = ".".repeat(pad_width.max(2));
|
||||||
|
|
||||||
|
let mut suffix = String::new();
|
||||||
|
if stage.items_processed > 0 {
|
||||||
|
suffix.push_str(&format!("{} items", stage.items_processed));
|
||||||
|
}
|
||||||
|
if stage.errors > 0 {
|
||||||
|
if !suffix.is_empty() {
|
||||||
|
suffix.push_str(", ");
|
||||||
|
}
|
||||||
|
suffix.push_str(&format!("{} errors", stage.errors));
|
||||||
|
}
|
||||||
|
if stage.rate_limit_hits > 0 {
|
||||||
|
if !suffix.is_empty() {
|
||||||
|
suffix.push_str(", ");
|
||||||
|
}
|
||||||
|
suffix.push_str(&format!("{} rate limits", stage.rate_limit_hits));
|
||||||
|
}
|
||||||
|
|
||||||
|
let time_str = format!("{:.1}s", stage.elapsed_ms as f64 / 1000.0);
|
||||||
|
if suffix.is_empty() {
|
||||||
|
println!("{indent}{name} {dots} {time_str}");
|
||||||
|
} else {
|
||||||
|
println!("{indent}{name} {dots} {time_str} ({suffix})");
|
||||||
|
}
|
||||||
|
|
||||||
|
for sub in &stage.sub_stages {
|
||||||
|
print_stage_line(sub, depth + 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// JSON output for sync.
|
/// JSON output for sync.
|
||||||
@@ -216,15 +308,23 @@ struct SyncJsonOutput<'a> {
|
|||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct SyncMeta {
|
struct SyncMeta {
|
||||||
|
run_id: String,
|
||||||
elapsed_ms: u64,
|
elapsed_ms: u64,
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
stages: Vec<StageTiming>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Print JSON robot-mode sync output.
|
/// Print JSON robot-mode sync output with optional metrics.
|
||||||
pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64) {
|
pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64, metrics: Option<&MetricsLayer>) {
|
||||||
|
let stages = metrics.map_or_else(Vec::new, MetricsLayer::extract_timings);
|
||||||
let output = SyncJsonOutput {
|
let output = SyncJsonOutput {
|
||||||
ok: true,
|
ok: true,
|
||||||
data: result,
|
data: result,
|
||||||
meta: SyncMeta { elapsed_ms },
|
meta: SyncMeta {
|
||||||
|
run_id: result.run_id.clone(),
|
||||||
|
elapsed_ms,
|
||||||
|
stages,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
println!("{}", serde_json::to_string(&output).unwrap());
|
println!("{}", serde_json::to_string(&output).unwrap());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,8 +7,11 @@ use serde::Serialize;
|
|||||||
use crate::Config;
|
use crate::Config;
|
||||||
use crate::core::db::create_connection;
|
use crate::core::db::create_connection;
|
||||||
use crate::core::error::Result;
|
use crate::core::error::Result;
|
||||||
|
use crate::core::metrics::StageTiming;
|
||||||
use crate::core::paths::get_db_path;
|
use crate::core::paths::get_db_path;
|
||||||
use crate::core::time::ms_to_iso;
|
use crate::core::time::{format_full_datetime, ms_to_iso};
|
||||||
|
|
||||||
|
const RECENT_RUNS_LIMIT: usize = 10;
|
||||||
|
|
||||||
/// Sync run information.
|
/// Sync run information.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@@ -19,6 +22,10 @@ pub struct SyncRunInfo {
|
|||||||
pub status: String,
|
pub status: String,
|
||||||
pub command: String,
|
pub command: String,
|
||||||
pub error: Option<String>,
|
pub error: Option<String>,
|
||||||
|
pub run_id: Option<String>,
|
||||||
|
pub total_items_processed: i64,
|
||||||
|
pub total_errors: i64,
|
||||||
|
pub stages: Option<Vec<StageTiming>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Cursor position information.
|
/// Cursor position information.
|
||||||
@@ -34,6 +41,7 @@ pub struct CursorInfo {
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct DataSummary {
|
pub struct DataSummary {
|
||||||
pub issue_count: i64,
|
pub issue_count: i64,
|
||||||
|
pub mr_count: i64,
|
||||||
pub discussion_count: i64,
|
pub discussion_count: i64,
|
||||||
pub note_count: i64,
|
pub note_count: i64,
|
||||||
pub system_note_count: i64,
|
pub system_note_count: i64,
|
||||||
@@ -42,7 +50,7 @@ pub struct DataSummary {
|
|||||||
/// Complete sync status result.
|
/// Complete sync status result.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct SyncStatusResult {
|
pub struct SyncStatusResult {
|
||||||
pub last_run: Option<SyncRunInfo>,
|
pub runs: Vec<SyncRunInfo>,
|
||||||
pub cursors: Vec<CursorInfo>,
|
pub cursors: Vec<CursorInfo>,
|
||||||
pub summary: DataSummary,
|
pub summary: DataSummary,
|
||||||
}
|
}
|
||||||
@@ -52,27 +60,33 @@ pub fn run_sync_status(config: &Config) -> Result<SyncStatusResult> {
|
|||||||
let db_path = get_db_path(config.storage.db_path.as_deref());
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
||||||
let conn = create_connection(&db_path)?;
|
let conn = create_connection(&db_path)?;
|
||||||
|
|
||||||
let last_run = get_last_sync_run(&conn)?;
|
let runs = get_recent_sync_runs(&conn, RECENT_RUNS_LIMIT)?;
|
||||||
let cursors = get_cursor_positions(&conn)?;
|
let cursors = get_cursor_positions(&conn)?;
|
||||||
let summary = get_data_summary(&conn)?;
|
let summary = get_data_summary(&conn)?;
|
||||||
|
|
||||||
Ok(SyncStatusResult {
|
Ok(SyncStatusResult {
|
||||||
last_run,
|
runs,
|
||||||
cursors,
|
cursors,
|
||||||
summary,
|
summary,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the most recent sync run.
|
/// Get the most recent sync runs.
|
||||||
fn get_last_sync_run(conn: &Connection) -> Result<Option<SyncRunInfo>> {
|
fn get_recent_sync_runs(conn: &Connection, limit: usize) -> Result<Vec<SyncRunInfo>> {
|
||||||
let mut stmt = conn.prepare(
|
let mut stmt = conn.prepare(
|
||||||
"SELECT id, started_at, finished_at, status, command, error
|
"SELECT id, started_at, finished_at, status, command, error,
|
||||||
|
run_id, total_items_processed, total_errors, metrics_json
|
||||||
FROM sync_runs
|
FROM sync_runs
|
||||||
ORDER BY started_at DESC
|
ORDER BY started_at DESC
|
||||||
LIMIT 1",
|
LIMIT ?1",
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let result = stmt.query_row([], |row| {
|
let runs: std::result::Result<Vec<_>, _> = stmt
|
||||||
|
.query_map([limit as i64], |row| {
|
||||||
|
let metrics_json: Option<String> = row.get(9)?;
|
||||||
|
let stages: Option<Vec<StageTiming>> =
|
||||||
|
metrics_json.and_then(|json| serde_json::from_str(&json).ok());
|
||||||
|
|
||||||
Ok(SyncRunInfo {
|
Ok(SyncRunInfo {
|
||||||
id: row.get(0)?,
|
id: row.get(0)?,
|
||||||
started_at: row.get(1)?,
|
started_at: row.get(1)?,
|
||||||
@@ -80,14 +94,15 @@ fn get_last_sync_run(conn: &Connection) -> Result<Option<SyncRunInfo>> {
|
|||||||
status: row.get(3)?,
|
status: row.get(3)?,
|
||||||
command: row.get(4)?,
|
command: row.get(4)?,
|
||||||
error: row.get(5)?,
|
error: row.get(5)?,
|
||||||
|
run_id: row.get(6)?,
|
||||||
|
total_items_processed: row.get::<_, Option<i64>>(7)?.unwrap_or(0),
|
||||||
|
total_errors: row.get::<_, Option<i64>>(8)?.unwrap_or(0),
|
||||||
|
stages,
|
||||||
})
|
})
|
||||||
});
|
})?
|
||||||
|
.collect();
|
||||||
|
|
||||||
match result {
|
Ok(runs?)
|
||||||
Ok(info) => Ok(Some(info)),
|
|
||||||
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
|
|
||||||
Err(e) => Err(e.into()),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get cursor positions for all projects/resource types.
|
/// Get cursor positions for all projects/resource types.
|
||||||
@@ -119,6 +134,10 @@ fn get_data_summary(conn: &Connection) -> Result<DataSummary> {
|
|||||||
.query_row("SELECT COUNT(*) FROM issues", [], |row| row.get(0))
|
.query_row("SELECT COUNT(*) FROM issues", [], |row| row.get(0))
|
||||||
.unwrap_or(0);
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
let mr_count: i64 = conn
|
||||||
|
.query_row("SELECT COUNT(*) FROM merge_requests", [], |row| row.get(0))
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
let discussion_count: i64 = conn
|
let discussion_count: i64 = conn
|
||||||
.query_row("SELECT COUNT(*) FROM discussions", [], |row| row.get(0))
|
.query_row("SELECT COUNT(*) FROM discussions", [], |row| row.get(0))
|
||||||
.unwrap_or(0);
|
.unwrap_or(0);
|
||||||
@@ -133,6 +152,7 @@ fn get_data_summary(conn: &Connection) -> Result<DataSummary> {
|
|||||||
|
|
||||||
Ok(DataSummary {
|
Ok(DataSummary {
|
||||||
issue_count,
|
issue_count,
|
||||||
|
mr_count,
|
||||||
discussion_count,
|
discussion_count,
|
||||||
note_count,
|
note_count,
|
||||||
system_note_count,
|
system_note_count,
|
||||||
@@ -149,15 +169,17 @@ fn format_duration(ms: i64) -> String {
|
|||||||
format!("{}h {}m {}s", hours, minutes % 60, seconds % 60)
|
format!("{}h {}m {}s", hours, minutes % 60, seconds % 60)
|
||||||
} else if minutes > 0 {
|
} else if minutes > 0 {
|
||||||
format!("{}m {}s", minutes, seconds % 60)
|
format!("{}m {}s", minutes, seconds % 60)
|
||||||
|
} else if ms >= 1000 {
|
||||||
|
format!("{:.1}s", ms as f64 / 1000.0)
|
||||||
} else {
|
} else {
|
||||||
format!("{}s", seconds)
|
format!("{}ms", ms)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Format number with thousands separators.
|
/// Format number with thousands separators.
|
||||||
fn format_number(n: i64) -> String {
|
fn format_number(n: i64) -> String {
|
||||||
let is_negative = n < 0;
|
let is_negative = n < 0;
|
||||||
let abs_n = n.abs();
|
let abs_n = n.unsigned_abs();
|
||||||
let s = abs_n.to_string();
|
let s = abs_n.to_string();
|
||||||
let chars: Vec<char> = s.chars().collect();
|
let chars: Vec<char> = s.chars().collect();
|
||||||
let mut result = String::new();
|
let mut result = String::new();
|
||||||
@@ -176,7 +198,10 @@ fn format_number(n: i64) -> String {
|
|||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/// JSON output structures for robot mode.
|
// ============================================================================
|
||||||
|
// JSON output structures for robot mode
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct SyncStatusJsonOutput {
|
struct SyncStatusJsonOutput {
|
||||||
ok: bool,
|
ok: bool,
|
||||||
@@ -185,7 +210,7 @@ struct SyncStatusJsonOutput {
|
|||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct SyncStatusJsonData {
|
struct SyncStatusJsonData {
|
||||||
last_sync: Option<SyncRunJsonInfo>,
|
runs: Vec<SyncRunJsonInfo>,
|
||||||
cursors: Vec<CursorJsonInfo>,
|
cursors: Vec<CursorJsonInfo>,
|
||||||
summary: SummaryJsonInfo,
|
summary: SummaryJsonInfo,
|
||||||
}
|
}
|
||||||
@@ -201,7 +226,13 @@ struct SyncRunJsonInfo {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
duration_ms: Option<i64>,
|
duration_ms: Option<i64>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
run_id: Option<String>,
|
||||||
|
total_items_processed: i64,
|
||||||
|
total_errors: i64,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
error: Option<String>,
|
error: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
stages: Option<Vec<StageTiming>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
@@ -217,6 +248,7 @@ struct CursorJsonInfo {
|
|||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct SummaryJsonInfo {
|
struct SummaryJsonInfo {
|
||||||
issues: i64,
|
issues: i64,
|
||||||
|
merge_requests: i64,
|
||||||
discussions: i64,
|
discussions: i64,
|
||||||
notes: i64,
|
notes: i64,
|
||||||
system_notes: i64,
|
system_notes: i64,
|
||||||
@@ -224,7 +256,10 @@ struct SummaryJsonInfo {
|
|||||||
|
|
||||||
/// Print sync status as JSON (robot mode).
|
/// Print sync status as JSON (robot mode).
|
||||||
pub fn print_sync_status_json(result: &SyncStatusResult) {
|
pub fn print_sync_status_json(result: &SyncStatusResult) {
|
||||||
let last_sync = result.last_run.as_ref().map(|run| {
|
let runs = result
|
||||||
|
.runs
|
||||||
|
.iter()
|
||||||
|
.map(|run| {
|
||||||
let duration_ms = run.finished_at.map(|f| f - run.started_at);
|
let duration_ms = run.finished_at.map(|f| f - run.started_at);
|
||||||
SyncRunJsonInfo {
|
SyncRunJsonInfo {
|
||||||
id: run.id,
|
id: run.id,
|
||||||
@@ -233,9 +268,14 @@ pub fn print_sync_status_json(result: &SyncStatusResult) {
|
|||||||
started_at: ms_to_iso(run.started_at),
|
started_at: ms_to_iso(run.started_at),
|
||||||
completed_at: run.finished_at.map(ms_to_iso),
|
completed_at: run.finished_at.map(ms_to_iso),
|
||||||
duration_ms,
|
duration_ms,
|
||||||
|
run_id: run.run_id.clone(),
|
||||||
|
total_items_processed: run.total_items_processed,
|
||||||
|
total_errors: run.total_errors,
|
||||||
error: run.error.clone(),
|
error: run.error.clone(),
|
||||||
|
stages: run.stages.clone(),
|
||||||
}
|
}
|
||||||
});
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
let cursors = result
|
let cursors = result
|
||||||
.cursors
|
.cursors
|
||||||
@@ -251,10 +291,11 @@ pub fn print_sync_status_json(result: &SyncStatusResult) {
|
|||||||
let output = SyncStatusJsonOutput {
|
let output = SyncStatusJsonOutput {
|
||||||
ok: true,
|
ok: true,
|
||||||
data: SyncStatusJsonData {
|
data: SyncStatusJsonData {
|
||||||
last_sync,
|
runs,
|
||||||
cursors,
|
cursors,
|
||||||
summary: SummaryJsonInfo {
|
summary: SummaryJsonInfo {
|
||||||
issues: result.summary.issue_count,
|
issues: result.summary.issue_count,
|
||||||
|
merge_requests: result.summary.mr_count,
|
||||||
discussions: result.summary.discussion_count,
|
discussions: result.summary.discussion_count,
|
||||||
notes: result.summary.note_count - result.summary.system_note_count,
|
notes: result.summary.note_count - result.summary.system_note_count,
|
||||||
system_notes: result.summary.system_note_count,
|
system_notes: result.summary.system_note_count,
|
||||||
@@ -265,41 +306,25 @@ pub fn print_sync_status_json(result: &SyncStatusResult) {
|
|||||||
println!("{}", serde_json::to_string(&output).unwrap());
|
println!("{}", serde_json::to_string(&output).unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Human-readable output
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
/// Print sync status result.
|
/// Print sync status result.
|
||||||
pub fn print_sync_status(result: &SyncStatusResult) {
|
pub fn print_sync_status(result: &SyncStatusResult) {
|
||||||
// Last Sync section
|
// Recent Runs section
|
||||||
println!("{}", style("Last Sync").bold().underlined());
|
println!("{}", style("Recent Sync Runs").bold().underlined());
|
||||||
println!();
|
println!();
|
||||||
|
|
||||||
match &result.last_run {
|
if result.runs.is_empty() {
|
||||||
Some(run) => {
|
|
||||||
let status_styled = match run.status.as_str() {
|
|
||||||
"succeeded" => style(&run.status).green(),
|
|
||||||
"failed" => style(&run.status).red(),
|
|
||||||
"running" => style(&run.status).yellow(),
|
|
||||||
_ => style(&run.status).dim(),
|
|
||||||
};
|
|
||||||
|
|
||||||
println!(" Status: {}", status_styled);
|
|
||||||
println!(" Command: {}", run.command);
|
|
||||||
println!(" Started: {}", ms_to_iso(run.started_at));
|
|
||||||
|
|
||||||
if let Some(finished) = run.finished_at {
|
|
||||||
println!(" Completed: {}", ms_to_iso(finished));
|
|
||||||
let duration = finished - run.started_at;
|
|
||||||
println!(" Duration: {}", format_duration(duration));
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(error) = &run.error {
|
|
||||||
println!(" Error: {}", style(error).red());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
println!(" {}", style("No sync runs recorded yet.").dim());
|
println!(" {}", style("No sync runs recorded yet.").dim());
|
||||||
println!(
|
println!(
|
||||||
" {}",
|
" {}",
|
||||||
style("Run 'lore ingest --type=issues' to start.").dim()
|
style("Run 'lore sync' or 'lore ingest' to start.").dim()
|
||||||
);
|
);
|
||||||
|
} else {
|
||||||
|
for run in &result.runs {
|
||||||
|
print_run_line(run);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -344,6 +369,10 @@ pub fn print_sync_status(result: &SyncStatusResult) {
|
|||||||
" Issues: {}",
|
" Issues: {}",
|
||||||
style(format_number(result.summary.issue_count)).bold()
|
style(format_number(result.summary.issue_count)).bold()
|
||||||
);
|
);
|
||||||
|
println!(
|
||||||
|
" MRs: {}",
|
||||||
|
style(format_number(result.summary.mr_count)).bold()
|
||||||
|
);
|
||||||
println!(
|
println!(
|
||||||
" Discussions: {}",
|
" Discussions: {}",
|
||||||
style(format_number(result.summary.discussion_count)).bold()
|
style(format_number(result.summary.discussion_count)).bold()
|
||||||
@@ -361,14 +390,63 @@ pub fn print_sync_status(result: &SyncStatusResult) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Print a single run as a compact one-liner.
|
||||||
|
fn print_run_line(run: &SyncRunInfo) {
|
||||||
|
let status_styled = match run.status.as_str() {
|
||||||
|
"succeeded" => style(&run.status).green(),
|
||||||
|
"failed" => style(&run.status).red(),
|
||||||
|
"running" => style(&run.status).yellow(),
|
||||||
|
_ => style(&run.status).dim(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let run_label = run
|
||||||
|
.run_id
|
||||||
|
.as_deref()
|
||||||
|
.map_or_else(|| format!("#{}", run.id), |id| format!("Run {id}"));
|
||||||
|
|
||||||
|
let duration = run.finished_at.map(|f| format_duration(f - run.started_at));
|
||||||
|
|
||||||
|
let time = format_full_datetime(run.started_at);
|
||||||
|
|
||||||
|
let mut parts = vec![
|
||||||
|
format!("{}", style(run_label).bold()),
|
||||||
|
format!("{status_styled}"),
|
||||||
|
format!("{}", style(&run.command).dim()),
|
||||||
|
time,
|
||||||
|
];
|
||||||
|
|
||||||
|
if let Some(d) = duration {
|
||||||
|
parts.push(d);
|
||||||
|
} else {
|
||||||
|
parts.push("in progress".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
if run.total_items_processed > 0 {
|
||||||
|
parts.push(format!("{} items", run.total_items_processed));
|
||||||
|
}
|
||||||
|
|
||||||
|
if run.total_errors > 0 {
|
||||||
|
parts.push(format!(
|
||||||
|
"{}",
|
||||||
|
style(format!("{} errors", run.total_errors)).red()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(" {}", parts.join(" | "));
|
||||||
|
|
||||||
|
if let Some(error) = &run.error {
|
||||||
|
println!(" {}", style(error).red());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn format_duration_handles_seconds() {
|
fn format_duration_handles_seconds() {
|
||||||
assert_eq!(format_duration(5_000), "5s");
|
assert_eq!(format_duration(5_000), "5.0s");
|
||||||
assert_eq!(format_duration(59_000), "59s");
|
assert_eq!(format_duration(59_000), "59.0s");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -385,6 +463,12 @@ mod tests {
|
|||||||
assert_eq!(format_duration(3_723_000), "1h 2m 3s");
|
assert_eq!(format_duration(3_723_000), "1h 2m 3s");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn format_duration_handles_milliseconds() {
|
||||||
|
assert_eq!(format_duration(500), "500ms");
|
||||||
|
assert_eq!(format_duration(0), "0ms");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn format_number_adds_thousands_separators() {
|
fn format_number_adds_thousands_separators() {
|
||||||
assert_eq!(format_number(1000), "1,000");
|
assert_eq!(format_number(1000), "1,000");
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use rusqlite::Connection;
|
use rusqlite::Connection;
|
||||||
use rusqlite::OptionalExtension;
|
use rusqlite::OptionalExtension;
|
||||||
use tracing::{debug, warn};
|
use tracing::{debug, instrument, warn};
|
||||||
|
|
||||||
use crate::core::error::Result;
|
use crate::core::error::Result;
|
||||||
use crate::documents::{
|
use crate::documents::{
|
||||||
@@ -21,6 +21,7 @@ pub struct RegenerateResult {
|
|||||||
///
|
///
|
||||||
/// Uses per-item error handling (fail-soft) and drains the queue completely
|
/// Uses per-item error handling (fail-soft) and drains the queue completely
|
||||||
/// via a bounded batch loop. Each dirty item is processed independently.
|
/// via a bounded batch loop. Each dirty item is processed independently.
|
||||||
|
#[instrument(skip(conn), fields(items_processed, items_skipped, errors))]
|
||||||
pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult> {
|
pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult> {
|
||||||
let mut result = RegenerateResult::default();
|
let mut result = RegenerateResult::default();
|
||||||
|
|
||||||
@@ -61,6 +62,10 @@ pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult>
|
|||||||
"Document regeneration complete"
|
"Document regeneration complete"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
tracing::Span::current().record("items_processed", result.regenerated);
|
||||||
|
tracing::Span::current().record("items_skipped", result.unchanged);
|
||||||
|
tracing::Span::current().record("errors", result.errored);
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -282,6 +287,7 @@ mod tests {
|
|||||||
updated_at INTEGER NOT NULL,
|
updated_at INTEGER NOT NULL,
|
||||||
last_seen_at INTEGER NOT NULL,
|
last_seen_at INTEGER NOT NULL,
|
||||||
discussions_synced_for_updated_at INTEGER,
|
discussions_synced_for_updated_at INTEGER,
|
||||||
|
resource_events_synced_for_updated_at INTEGER,
|
||||||
web_url TEXT,
|
web_url TEXT,
|
||||||
raw_payload_id INTEGER
|
raw_payload_id INTEGER
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::collections::HashSet;
|
|||||||
|
|
||||||
use rusqlite::Connection;
|
use rusqlite::Connection;
|
||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
use tracing::{info, warn};
|
use tracing::{info, instrument, warn};
|
||||||
|
|
||||||
use crate::core::error::Result;
|
use crate::core::error::Result;
|
||||||
use crate::embedding::change_detector::{count_pending_documents, find_pending_documents};
|
use crate::embedding::change_detector::{count_pending_documents, find_pending_documents};
|
||||||
@@ -37,6 +37,7 @@ struct ChunkWork {
|
|||||||
///
|
///
|
||||||
/// Processes batches of BATCH_SIZE texts per Ollama API call.
|
/// Processes batches of BATCH_SIZE texts per Ollama API call.
|
||||||
/// Uses keyset pagination over documents (DB_PAGE_SIZE per page).
|
/// Uses keyset pagination over documents (DB_PAGE_SIZE per page).
|
||||||
|
#[instrument(skip(conn, client, progress_callback), fields(%model_name, items_processed, items_skipped, errors))]
|
||||||
pub async fn embed_documents(
|
pub async fn embed_documents(
|
||||||
conn: &Connection,
|
conn: &Connection,
|
||||||
client: &OllamaClient,
|
client: &OllamaClient,
|
||||||
@@ -87,7 +88,7 @@ pub async fn embed_documents(
|
|||||||
// Overflow guard: skip documents that produce too many chunks.
|
// Overflow guard: skip documents that produce too many chunks.
|
||||||
// Must run BEFORE clear_document_embeddings so existing embeddings
|
// Must run BEFORE clear_document_embeddings so existing embeddings
|
||||||
// are preserved when we skip.
|
// are preserved when we skip.
|
||||||
if total_chunks as i64 >= CHUNK_ROWID_MULTIPLIER {
|
if total_chunks as i64 > CHUNK_ROWID_MULTIPLIER {
|
||||||
warn!(
|
warn!(
|
||||||
doc_id = doc.document_id,
|
doc_id = doc.document_id,
|
||||||
chunk_count = total_chunks,
|
chunk_count = total_chunks,
|
||||||
@@ -295,6 +296,10 @@ pub async fn embed_documents(
|
|||||||
"Embedding pipeline complete"
|
"Embedding pipeline complete"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
tracing::Span::current().record("items_processed", result.embedded);
|
||||||
|
tracing::Span::current().record("items_skipped", result.skipped);
|
||||||
|
tracing::Span::current().record("errors", result.failed);
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,11 +7,11 @@
|
|||||||
|
|
||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
use rusqlite::Connection;
|
use rusqlite::Connection;
|
||||||
use tracing::{debug, info, warn};
|
use tracing::{debug, info, instrument, warn};
|
||||||
|
|
||||||
use crate::Config;
|
use crate::Config;
|
||||||
use crate::core::dependent_queue::{
|
use crate::core::dependent_queue::{
|
||||||
claim_jobs, complete_job, count_pending_jobs, enqueue_job, fail_job, reclaim_stale_locks,
|
claim_jobs, complete_job, count_claimable_jobs, enqueue_job, fail_job, reclaim_stale_locks,
|
||||||
};
|
};
|
||||||
use crate::core::error::Result;
|
use crate::core::error::Result;
|
||||||
use crate::gitlab::GitLabClient;
|
use crate::gitlab::GitLabClient;
|
||||||
@@ -108,6 +108,10 @@ pub async fn ingest_project_issues(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Ingest all issues and their discussions for a project with progress reporting.
|
/// Ingest all issues and their discussions for a project with progress reporting.
|
||||||
|
#[instrument(
|
||||||
|
skip(conn, client, config, progress),
|
||||||
|
fields(project_id, gitlab_project_id, items_processed, items_skipped, errors)
|
||||||
|
)]
|
||||||
pub async fn ingest_project_issues_with_progress(
|
pub async fn ingest_project_issues_with_progress(
|
||||||
conn: &Connection,
|
conn: &Connection,
|
||||||
client: &GitLabClient,
|
client: &GitLabClient,
|
||||||
@@ -124,12 +128,17 @@ pub async fn ingest_project_issues_with_progress(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Step 1: Ingest issues
|
// Step 1: Ingest issues
|
||||||
|
emit(ProgressEvent::IssuesFetchStarted);
|
||||||
let issue_result = ingest_issues(conn, client, config, project_id, gitlab_project_id).await?;
|
let issue_result = ingest_issues(conn, client, config, project_id, gitlab_project_id).await?;
|
||||||
|
|
||||||
result.issues_fetched = issue_result.fetched;
|
result.issues_fetched = issue_result.fetched;
|
||||||
result.issues_upserted = issue_result.upserted;
|
result.issues_upserted = issue_result.upserted;
|
||||||
result.labels_created = issue_result.labels_created;
|
result.labels_created = issue_result.labels_created;
|
||||||
|
|
||||||
|
emit(ProgressEvent::IssuesFetchComplete {
|
||||||
|
total: result.issues_fetched,
|
||||||
|
});
|
||||||
|
|
||||||
// Step 2: Sync discussions for issues that need it
|
// Step 2: Sync discussions for issues that need it
|
||||||
let issues_needing_sync = issue_result.issues_needing_discussion_sync;
|
let issues_needing_sync = issue_result.issues_needing_discussion_sync;
|
||||||
|
|
||||||
@@ -189,8 +198,15 @@ pub async fn ingest_project_issues_with_progress(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Drain the queue
|
// Drain the queue
|
||||||
let drain_result =
|
let drain_result = drain_resource_events(
|
||||||
drain_resource_events(conn, client, config, gitlab_project_id, &progress).await?;
|
conn,
|
||||||
|
client,
|
||||||
|
config,
|
||||||
|
project_id,
|
||||||
|
gitlab_project_id,
|
||||||
|
&progress,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
result.resource_events_fetched = drain_result.fetched;
|
result.resource_events_fetched = drain_result.fetched;
|
||||||
result.resource_events_failed = drain_result.failed;
|
result.resource_events_failed = drain_result.failed;
|
||||||
}
|
}
|
||||||
@@ -208,6 +224,10 @@ pub async fn ingest_project_issues_with_progress(
|
|||||||
"Project ingestion complete"
|
"Project ingestion complete"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
tracing::Span::current().record("items_processed", result.issues_upserted);
|
||||||
|
tracing::Span::current().record("items_skipped", result.issues_skipped_discussion_sync);
|
||||||
|
tracing::Span::current().record("errors", result.resource_events_failed);
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -280,6 +300,10 @@ pub async fn ingest_project_merge_requests(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Ingest all merge requests and their discussions for a project with progress reporting.
|
/// Ingest all merge requests and their discussions for a project with progress reporting.
|
||||||
|
#[instrument(
|
||||||
|
skip(conn, client, config, progress),
|
||||||
|
fields(project_id, gitlab_project_id, items_processed, items_skipped, errors)
|
||||||
|
)]
|
||||||
pub async fn ingest_project_merge_requests_with_progress(
|
pub async fn ingest_project_merge_requests_with_progress(
|
||||||
conn: &Connection,
|
conn: &Connection,
|
||||||
client: &GitLabClient,
|
client: &GitLabClient,
|
||||||
@@ -380,8 +404,15 @@ pub async fn ingest_project_merge_requests_with_progress(
|
|||||||
debug!(enqueued, "Enqueued resource events jobs for MRs");
|
debug!(enqueued, "Enqueued resource events jobs for MRs");
|
||||||
}
|
}
|
||||||
|
|
||||||
let drain_result =
|
let drain_result = drain_resource_events(
|
||||||
drain_resource_events(conn, client, config, gitlab_project_id, &progress).await?;
|
conn,
|
||||||
|
client,
|
||||||
|
config,
|
||||||
|
project_id,
|
||||||
|
gitlab_project_id,
|
||||||
|
&progress,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
result.resource_events_fetched = drain_result.fetched;
|
result.resource_events_fetched = drain_result.fetched;
|
||||||
result.resource_events_failed = drain_result.failed;
|
result.resource_events_failed = drain_result.failed;
|
||||||
}
|
}
|
||||||
@@ -400,6 +431,10 @@ pub async fn ingest_project_merge_requests_with_progress(
|
|||||||
"MR project ingestion complete"
|
"MR project ingestion complete"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
tracing::Span::current().record("items_processed", result.mrs_upserted);
|
||||||
|
tracing::Span::current().record("items_skipped", result.mrs_skipped_discussion_sync);
|
||||||
|
tracing::Span::current().record("errors", result.resource_events_failed);
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -455,6 +490,7 @@ async fn sync_mr_discussions_sequential(
|
|||||||
pub struct DrainResult {
|
pub struct DrainResult {
|
||||||
pub fetched: usize,
|
pub fetched: usize,
|
||||||
pub failed: usize,
|
pub failed: usize,
|
||||||
|
pub skipped_not_found: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Enqueue resource_events jobs for all entities of a given type in a project.
|
/// Enqueue resource_events jobs for all entities of a given type in a project.
|
||||||
@@ -466,21 +502,60 @@ fn enqueue_resource_events_for_entity_type(
|
|||||||
project_id: i64,
|
project_id: i64,
|
||||||
entity_type: &str,
|
entity_type: &str,
|
||||||
) -> Result<usize> {
|
) -> Result<usize> {
|
||||||
// Query all entities for this project and enqueue resource_events jobs.
|
// Clean up obsolete jobs: remove resource_events jobs for entities whose
|
||||||
// The UNIQUE constraint on pending_dependent_fetches makes this idempotent -
|
// watermark is already current (updated_at <= resource_events_synced_for_updated_at).
|
||||||
// already-queued entities are silently skipped via INSERT OR IGNORE.
|
// These are leftover from prior runs that failed after watermark-stamping but
|
||||||
|
// before job deletion, or from entities that no longer need syncing.
|
||||||
|
// We intentionally keep jobs for entities that still need syncing (including
|
||||||
|
// in-progress or failed-with-backoff jobs) to preserve retry state.
|
||||||
|
match entity_type {
|
||||||
|
"issue" => {
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM pending_dependent_fetches \
|
||||||
|
WHERE project_id = ?1 AND entity_type = 'issue' AND job_type = 'resource_events' \
|
||||||
|
AND entity_local_id IN ( \
|
||||||
|
SELECT id FROM issues \
|
||||||
|
WHERE project_id = ?1 \
|
||||||
|
AND updated_at <= COALESCE(resource_events_synced_for_updated_at, 0) \
|
||||||
|
)",
|
||||||
|
[project_id],
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
"merge_request" => {
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM pending_dependent_fetches \
|
||||||
|
WHERE project_id = ?1 AND entity_type = 'merge_request' AND job_type = 'resource_events' \
|
||||||
|
AND entity_local_id IN ( \
|
||||||
|
SELECT id FROM merge_requests \
|
||||||
|
WHERE project_id = ?1 \
|
||||||
|
AND updated_at <= COALESCE(resource_events_synced_for_updated_at, 0) \
|
||||||
|
)",
|
||||||
|
[project_id],
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enqueue resource_events jobs only for entities whose updated_at exceeds
|
||||||
|
// their last resource event sync watermark.
|
||||||
//
|
//
|
||||||
// Use separate hardcoded queries per entity type to avoid format!-based SQL.
|
// Use separate hardcoded queries per entity type to avoid format!-based SQL.
|
||||||
let entities: Vec<(i64, i64)> = match entity_type {
|
let entities: Vec<(i64, i64)> = match entity_type {
|
||||||
"issue" => {
|
"issue" => {
|
||||||
let mut stmt =
|
let mut stmt = conn.prepare_cached(
|
||||||
conn.prepare_cached("SELECT id, iid FROM issues WHERE project_id = ?1")?;
|
"SELECT id, iid FROM issues \
|
||||||
|
WHERE project_id = ?1 \
|
||||||
|
AND updated_at > COALESCE(resource_events_synced_for_updated_at, 0)",
|
||||||
|
)?;
|
||||||
stmt.query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
|
stmt.query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
|
||||||
.collect::<std::result::Result<Vec<_>, _>>()?
|
.collect::<std::result::Result<Vec<_>, _>>()?
|
||||||
}
|
}
|
||||||
"merge_request" => {
|
"merge_request" => {
|
||||||
let mut stmt =
|
let mut stmt = conn.prepare_cached(
|
||||||
conn.prepare_cached("SELECT id, iid FROM merge_requests WHERE project_id = ?1")?;
|
"SELECT id, iid FROM merge_requests \
|
||||||
|
WHERE project_id = ?1 \
|
||||||
|
AND updated_at > COALESCE(resource_events_synced_for_updated_at, 0)",
|
||||||
|
)?;
|
||||||
stmt.query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
|
stmt.query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
|
||||||
.collect::<std::result::Result<Vec<_>, _>>()?
|
.collect::<std::result::Result<Vec<_>, _>>()?
|
||||||
}
|
}
|
||||||
@@ -509,10 +584,15 @@ fn enqueue_resource_events_for_entity_type(
|
|||||||
///
|
///
|
||||||
/// Processes jobs sequentially since `rusqlite::Connection` is not `Send`.
|
/// Processes jobs sequentially since `rusqlite::Connection` is not `Send`.
|
||||||
/// Uses exponential backoff on failure via `fail_job`.
|
/// Uses exponential backoff on failure via `fail_job`.
|
||||||
|
#[instrument(
|
||||||
|
skip(conn, client, config, progress),
|
||||||
|
fields(project_id, gitlab_project_id, items_processed, errors)
|
||||||
|
)]
|
||||||
async fn drain_resource_events(
|
async fn drain_resource_events(
|
||||||
conn: &Connection,
|
conn: &Connection,
|
||||||
client: &GitLabClient,
|
client: &GitLabClient,
|
||||||
config: &Config,
|
config: &Config,
|
||||||
|
project_id: i64,
|
||||||
gitlab_project_id: i64,
|
gitlab_project_id: i64,
|
||||||
progress: &Option<ProgressCallback>,
|
progress: &Option<ProgressCallback>,
|
||||||
) -> Result<DrainResult> {
|
) -> Result<DrainResult> {
|
||||||
@@ -525,9 +605,15 @@ async fn drain_resource_events(
|
|||||||
info!(reclaimed, "Reclaimed stale resource event locks");
|
info!(reclaimed, "Reclaimed stale resource event locks");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count total pending jobs for progress reporting
|
// Count only claimable jobs (unlocked, past retry backoff) for accurate progress.
|
||||||
let pending_counts = count_pending_jobs(conn)?;
|
// Using count_pending_jobs here would inflate the total with locked/backing-off
|
||||||
let total_pending = pending_counts.get("resource_events").copied().unwrap_or(0);
|
// jobs that can't be claimed in this drain run, causing the progress bar to
|
||||||
|
// never reach 100%.
|
||||||
|
let claimable_counts = count_claimable_jobs(conn, project_id)?;
|
||||||
|
let total_pending = claimable_counts
|
||||||
|
.get("resource_events")
|
||||||
|
.copied()
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
if total_pending == 0 {
|
if total_pending == 0 {
|
||||||
return Ok(result);
|
return Ok(result);
|
||||||
@@ -547,15 +633,19 @@ async fn drain_resource_events(
|
|||||||
let mut seen_job_ids = std::collections::HashSet::new();
|
let mut seen_job_ids = std::collections::HashSet::new();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let jobs = claim_jobs(conn, "resource_events", batch_size)?;
|
let jobs = claim_jobs(conn, "resource_events", project_id, batch_size)?;
|
||||||
if jobs.is_empty() {
|
if jobs.is_empty() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track whether any job in this batch was actually new. If every
|
||||||
|
// claimed job was already seen, break to avoid an infinite loop
|
||||||
|
// (can happen with clock skew or zero-backoff edge cases).
|
||||||
|
let mut any_new_in_batch = false;
|
||||||
|
|
||||||
for job in &jobs {
|
for job in &jobs {
|
||||||
// Guard against re-processing a job that was failed and re-claimed
|
// Guard against re-processing a job that was failed and re-claimed
|
||||||
// within the same drain run (shouldn't happen due to backoff, but
|
// within the same drain run.
|
||||||
// defensive against clock skew or zero-backoff edge cases).
|
|
||||||
if !seen_job_ids.insert(job.id) {
|
if !seen_job_ids.insert(job.id) {
|
||||||
warn!(
|
warn!(
|
||||||
job_id = job.id,
|
job_id = job.id,
|
||||||
@@ -563,6 +653,7 @@ async fn drain_resource_events(
|
|||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
any_new_in_batch = true;
|
||||||
|
|
||||||
match client
|
match client
|
||||||
.fetch_all_resource_events(gitlab_project_id, &job.entity_type, job.entity_iid)
|
.fetch_all_resource_events(gitlab_project_id, &job.entity_type, job.entity_iid)
|
||||||
@@ -582,6 +673,11 @@ async fn drain_resource_events(
|
|||||||
match store_result {
|
match store_result {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
complete_job(conn, job.id)?;
|
complete_job(conn, job.id)?;
|
||||||
|
update_resource_event_watermark(
|
||||||
|
conn,
|
||||||
|
&job.entity_type,
|
||||||
|
job.entity_local_id,
|
||||||
|
)?;
|
||||||
result.fetched += 1;
|
result.fetched += 1;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@@ -597,6 +693,25 @@ async fn drain_resource_events(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
// Only 404 (not found) is truly permanent -- the resource
|
||||||
|
// events endpoint doesn't exist for this entity. Stamp the
|
||||||
|
// watermark so we skip it next run. All other errors
|
||||||
|
// (403, auth, network) get backoff retry.
|
||||||
|
if e.is_permanent_api_error() {
|
||||||
|
debug!(
|
||||||
|
entity_type = %job.entity_type,
|
||||||
|
entity_iid = job.entity_iid,
|
||||||
|
error = %e,
|
||||||
|
"Permanent API error for resource events, marking complete"
|
||||||
|
);
|
||||||
|
complete_job(conn, job.id)?;
|
||||||
|
update_resource_event_watermark(
|
||||||
|
conn,
|
||||||
|
&job.entity_type,
|
||||||
|
job.entity_local_id,
|
||||||
|
)?;
|
||||||
|
result.skipped_not_found += 1;
|
||||||
|
} else {
|
||||||
warn!(
|
warn!(
|
||||||
entity_type = %job.entity_type,
|
entity_type = %job.entity_type,
|
||||||
entity_iid = job.entity_iid,
|
entity_iid = job.entity_iid,
|
||||||
@@ -607,6 +722,7 @@ async fn drain_resource_events(
|
|||||||
result.failed += 1;
|
result.failed += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
processed += 1;
|
processed += 1;
|
||||||
emit(ProgressEvent::ResourceEventFetched {
|
emit(ProgressEvent::ResourceEventFetched {
|
||||||
@@ -614,6 +730,12 @@ async fn drain_resource_events(
|
|||||||
total: total_pending,
|
total: total_pending,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If every job in this batch was already seen, stop to prevent spinning.
|
||||||
|
if !any_new_in_batch {
|
||||||
|
warn!("All claimed jobs were already processed, breaking drain loop");
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
emit(ProgressEvent::ResourceEventsFetchComplete {
|
emit(ProgressEvent::ResourceEventsFetchComplete {
|
||||||
@@ -629,6 +751,9 @@ async fn drain_resource_events(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tracing::Span::current().record("items_processed", result.fetched);
|
||||||
|
tracing::Span::current().record("errors", result.failed);
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -680,6 +805,33 @@ fn store_resource_events(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Update the resource event watermark for an entity after successful event fetch.
|
||||||
|
///
|
||||||
|
/// Sets `resource_events_synced_for_updated_at = updated_at` so the entity
|
||||||
|
/// won't be re-enqueued until its `updated_at` advances again.
|
||||||
|
fn update_resource_event_watermark(
|
||||||
|
conn: &Connection,
|
||||||
|
entity_type: &str,
|
||||||
|
entity_local_id: i64,
|
||||||
|
) -> Result<()> {
|
||||||
|
match entity_type {
|
||||||
|
"issue" => {
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE issues SET resource_events_synced_for_updated_at = updated_at WHERE id = ?",
|
||||||
|
[entity_local_id],
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
"merge_request" => {
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE merge_requests SET resource_events_synced_for_updated_at = updated_at WHERE id = ?",
|
||||||
|
[entity_local_id],
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -722,6 +874,7 @@ mod tests {
|
|||||||
let result = DrainResult::default();
|
let result = DrainResult::default();
|
||||||
assert_eq!(result.fetched, 0);
|
assert_eq!(result.fetched, 0);
|
||||||
assert_eq!(result.failed, 0);
|
assert_eq!(result.failed, 0);
|
||||||
|
assert_eq!(result.skipped_not_found, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
Reference in New Issue
Block a user