feat(who): expand expert + overlap queries with mr_file_changes and mr_reviewers
Chain: bd-jec (config flag) -> bd-2yo (fetch MR diffs) -> bd-3qn6 (rewrite who queries) - Add fetch_mr_file_changes config option and --no-file-changes CLI flag - Add GitLab MR diffs API fetch pipeline with watermark-based sync - Create migration 020 for diffs_synced_for_updated_at watermark column - Rewrite query_expert() and query_overlap() to use 4-signal UNION ALL: DiffNote reviewers, DiffNote MR authors, file-change authors, file-change reviewers - Deduplicate across signal types via COUNT(DISTINCT CASE WHEN ... THEN mr_id END) - Add insert_file_change test helper, 8 new who tests, all 397 tests pass - Also includes: list performance migration 019, autocorrect module, README updates Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -42,6 +42,9 @@ pub enum ProgressEvent {
|
||||
ClosesIssuesFetchStarted { total: usize },
|
||||
ClosesIssueFetched { current: usize, total: usize },
|
||||
ClosesIssuesFetchComplete { fetched: usize, failed: usize },
|
||||
MrDiffsFetchStarted { total: usize },
|
||||
MrDiffFetched { current: usize, total: usize },
|
||||
MrDiffsFetchComplete { fetched: usize, failed: usize },
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
@@ -76,6 +79,8 @@ pub struct IngestMrProjectResult {
|
||||
pub resource_events_failed: usize,
|
||||
pub closes_issues_fetched: usize,
|
||||
pub closes_issues_failed: usize,
|
||||
pub mr_diffs_fetched: usize,
|
||||
pub mr_diffs_failed: usize,
|
||||
}
|
||||
|
||||
pub async fn ingest_project_issues(
|
||||
@@ -466,6 +471,31 @@ pub async fn ingest_project_merge_requests_with_progress(
|
||||
result.closes_issues_failed = closes_result.failed;
|
||||
}
|
||||
|
||||
if signal.is_cancelled() {
|
||||
info!("Shutdown requested, returning partial MR results");
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
if config.sync.fetch_mr_file_changes {
|
||||
let enqueued = enqueue_mr_diffs_jobs(conn, project_id)?;
|
||||
if enqueued > 0 {
|
||||
debug!(enqueued, "Enqueued mr_diffs jobs");
|
||||
}
|
||||
|
||||
let diffs_result = drain_mr_diffs(
|
||||
conn,
|
||||
client,
|
||||
config,
|
||||
project_id,
|
||||
gitlab_project_id,
|
||||
&progress,
|
||||
signal,
|
||||
)
|
||||
.await?;
|
||||
result.mr_diffs_fetched = diffs_result.fetched;
|
||||
result.mr_diffs_failed = diffs_result.failed;
|
||||
}
|
||||
|
||||
info!(
|
||||
mrs_fetched = result.mrs_fetched,
|
||||
mrs_upserted = result.mrs_upserted,
|
||||
@@ -479,6 +509,8 @@ pub async fn ingest_project_merge_requests_with_progress(
|
||||
resource_events_failed = result.resource_events_failed,
|
||||
closes_issues_fetched = result.closes_issues_fetched,
|
||||
closes_issues_failed = result.closes_issues_failed,
|
||||
mr_diffs_fetched = result.mr_diffs_fetched,
|
||||
mr_diffs_failed = result.mr_diffs_failed,
|
||||
"MR project ingestion complete"
|
||||
);
|
||||
|
||||
@@ -1188,6 +1220,235 @@ fn store_closes_issues_refs(
|
||||
}
|
||||
}
|
||||
|
||||
// ─── MR Diffs (file changes) ────────────────────────────────────────────────
|
||||
|
||||
fn enqueue_mr_diffs_jobs(conn: &Connection, project_id: i64) -> Result<usize> {
|
||||
// Remove stale jobs for MRs that haven't changed since their last diffs sync
|
||||
conn.execute(
|
||||
"DELETE FROM pending_dependent_fetches \
|
||||
WHERE project_id = ?1 AND entity_type = 'merge_request' AND job_type = 'mr_diffs' \
|
||||
AND entity_local_id IN ( \
|
||||
SELECT id FROM merge_requests \
|
||||
WHERE project_id = ?1 \
|
||||
AND updated_at <= COALESCE(diffs_synced_for_updated_at, 0) \
|
||||
)",
|
||||
[project_id],
|
||||
)?;
|
||||
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT id, iid FROM merge_requests \
|
||||
WHERE project_id = ?1 \
|
||||
AND updated_at > COALESCE(diffs_synced_for_updated_at, 0)",
|
||||
)?;
|
||||
let entities: Vec<(i64, i64)> = stmt
|
||||
.query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
|
||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||
|
||||
let mut enqueued = 0;
|
||||
for (local_id, iid) in &entities {
|
||||
if enqueue_job(
|
||||
conn,
|
||||
project_id,
|
||||
"merge_request",
|
||||
*iid,
|
||||
*local_id,
|
||||
"mr_diffs",
|
||||
None,
|
||||
)? {
|
||||
enqueued += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(enqueued)
|
||||
}
|
||||
|
||||
struct PrefetchedMrDiffs {
|
||||
job_id: i64,
|
||||
entity_iid: i64,
|
||||
entity_local_id: i64,
|
||||
result:
|
||||
std::result::Result<Vec<crate::gitlab::types::GitLabMrDiff>, crate::core::error::LoreError>,
|
||||
}
|
||||
|
||||
async fn prefetch_mr_diffs(
|
||||
client: &GitLabClient,
|
||||
gitlab_project_id: i64,
|
||||
job_id: i64,
|
||||
entity_iid: i64,
|
||||
entity_local_id: i64,
|
||||
) -> PrefetchedMrDiffs {
|
||||
let result = client.fetch_mr_diffs(gitlab_project_id, entity_iid).await;
|
||||
PrefetchedMrDiffs {
|
||||
job_id,
|
||||
entity_iid,
|
||||
entity_local_id,
|
||||
result,
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(
|
||||
skip(conn, client, config, progress, signal),
|
||||
fields(project_id, gitlab_project_id, items_processed, errors)
|
||||
)]
|
||||
async fn drain_mr_diffs(
|
||||
conn: &Connection,
|
||||
client: &GitLabClient,
|
||||
config: &Config,
|
||||
project_id: i64,
|
||||
gitlab_project_id: i64,
|
||||
progress: &Option<ProgressCallback>,
|
||||
signal: &ShutdownSignal,
|
||||
) -> Result<DrainResult> {
|
||||
let mut result = DrainResult::default();
|
||||
let batch_size = config.sync.dependent_concurrency as usize;
|
||||
|
||||
let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?;
|
||||
if reclaimed > 0 {
|
||||
info!(reclaimed, "Reclaimed stale mr_diffs locks");
|
||||
}
|
||||
|
||||
let claimable_counts = count_claimable_jobs(conn, project_id)?;
|
||||
let total_pending = claimable_counts.get("mr_diffs").copied().unwrap_or(0);
|
||||
|
||||
if total_pending == 0 {
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
let emit = |event: ProgressEvent| {
|
||||
if let Some(cb) = progress {
|
||||
cb(event);
|
||||
}
|
||||
};
|
||||
|
||||
emit(ProgressEvent::MrDiffsFetchStarted {
|
||||
total: total_pending,
|
||||
});
|
||||
|
||||
let mut processed = 0;
|
||||
let mut seen_job_ids = std::collections::HashSet::new();
|
||||
|
||||
loop {
|
||||
if signal.is_cancelled() {
|
||||
info!("Shutdown requested during mr_diffs drain, returning partial results");
|
||||
break;
|
||||
}
|
||||
|
||||
let jobs = claim_jobs(conn, "mr_diffs", project_id, batch_size)?;
|
||||
if jobs.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
// Phase 1: Concurrent HTTP fetches
|
||||
let futures: Vec<_> = jobs
|
||||
.iter()
|
||||
.filter(|j| seen_job_ids.insert(j.id))
|
||||
.map(|j| {
|
||||
prefetch_mr_diffs(
|
||||
client,
|
||||
gitlab_project_id,
|
||||
j.id,
|
||||
j.entity_iid,
|
||||
j.entity_local_id,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if futures.is_empty() {
|
||||
warn!("All claimed mr_diffs jobs were already processed, breaking drain loop");
|
||||
break;
|
||||
}
|
||||
|
||||
let prefetched = join_all(futures).await;
|
||||
|
||||
// Phase 2: Serial DB writes
|
||||
for p in prefetched {
|
||||
match p.result {
|
||||
Ok(diffs) => {
|
||||
let store_result = super::mr_diffs::upsert_mr_file_changes(
|
||||
conn,
|
||||
p.entity_local_id,
|
||||
project_id,
|
||||
&diffs,
|
||||
);
|
||||
|
||||
match store_result {
|
||||
Ok(_) => {
|
||||
let tx = conn.unchecked_transaction()?;
|
||||
complete_job_tx(&tx, p.job_id)?;
|
||||
update_diffs_watermark_tx(&tx, p.entity_local_id)?;
|
||||
tx.commit()?;
|
||||
result.fetched += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
entity_iid = p.entity_iid,
|
||||
error = %e,
|
||||
"Failed to store MR file changes"
|
||||
);
|
||||
fail_job(conn, p.job_id, &e.to_string())?;
|
||||
result.failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if e.is_permanent_api_error() {
|
||||
debug!(
|
||||
entity_iid = p.entity_iid,
|
||||
error = %e,
|
||||
"Permanent API error for mr_diffs, marking complete"
|
||||
);
|
||||
let tx = conn.unchecked_transaction()?;
|
||||
complete_job_tx(&tx, p.job_id)?;
|
||||
update_diffs_watermark_tx(&tx, p.entity_local_id)?;
|
||||
tx.commit()?;
|
||||
result.skipped_not_found += 1;
|
||||
} else {
|
||||
warn!(
|
||||
entity_iid = p.entity_iid,
|
||||
error = %e,
|
||||
"Failed to fetch MR diffs from GitLab"
|
||||
);
|
||||
fail_job(conn, p.job_id, &e.to_string())?;
|
||||
result.failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
processed += 1;
|
||||
emit(ProgressEvent::MrDiffFetched {
|
||||
current: processed,
|
||||
total: total_pending,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
emit(ProgressEvent::MrDiffsFetchComplete {
|
||||
fetched: result.fetched,
|
||||
failed: result.failed,
|
||||
});
|
||||
|
||||
if result.fetched > 0 || result.failed > 0 {
|
||||
info!(
|
||||
fetched = result.fetched,
|
||||
failed = result.failed,
|
||||
"mr_diffs drain complete"
|
||||
);
|
||||
}
|
||||
|
||||
tracing::Span::current().record("items_processed", result.fetched);
|
||||
tracing::Span::current().record("errors", result.failed);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn update_diffs_watermark_tx(tx: &rusqlite::Transaction<'_>, mr_local_id: i64) -> Result<()> {
|
||||
tx.execute(
|
||||
"UPDATE merge_requests SET diffs_synced_for_updated_at = updated_at WHERE id = ?",
|
||||
[mr_local_id],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
Reference in New Issue
Block a user