fix: Project-scoped job claiming, structured rate-limit logging, RRF total_cmp

Targeted fixes across multiple subsystems:

dependent_queue:
- Add project_id parameter to claim_jobs() for project-scoped job claiming,
  preventing cross-project job theft during concurrent multi-project ingestion
- Add project_id parameter to count_pending_jobs() with optional scoping
  (None returns global counts, Some(pid) returns per-project counts)

gitlab/client:
- Downgrade rate-limit log from warn to info (429s are expected operational
  behavior, not warnings) and add structured fields (path, status_code)
  for better log filtering and aggregation

gitlab/transformers/discussion:
- Add tracing::warn on invalid timestamp parse instead of silent fallback
  to epoch 0, making data quality issues visible in logs

ingestion/merge_requests:
- Remove duplicate doc comment on upsert_label_tx

search/rrf:
- Replace partial_cmp().unwrap_or() with total_cmp() for f64 sorting,
  eliminating the NaN edge case entirely (total_cmp treats NaN consistently)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
teernisse
2026-02-04 10:01:28 -05:00
parent f6d19a9467
commit 86a51cddef
6 changed files with 102 additions and 38 deletions

View File

@@ -54,11 +54,16 @@ pub fn enqueue_job(
Ok(changes > 0)
}
/// Claim a batch of jobs for processing.
/// Claim a batch of jobs for processing, scoped to a specific project.
///
/// Atomically selects and locks jobs within a transaction. Only claims jobs
/// where `locked_at IS NULL` and `(next_retry_at IS NULL OR next_retry_at <= now)`.
pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Result<Vec<PendingJob>> {
pub fn claim_jobs(
conn: &Connection,
job_type: &str,
project_id: i64,
batch_size: usize,
) -> Result<Vec<PendingJob>> {
if batch_size == 0 {
return Ok(Vec::new());
}
@@ -73,6 +78,7 @@ pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Resul
WHERE id IN (
SELECT id FROM pending_dependent_fetches
WHERE job_type = ?2
AND project_id = ?4
AND locked_at IS NULL
AND (next_retry_at IS NULL OR next_retry_at <= ?1)
ORDER BY enqueued_at ASC
@@ -83,18 +89,21 @@ pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Resul
)?;
let jobs = stmt
.query_map(rusqlite::params![now, job_type, batch_size as i64], |row| {
Ok(PendingJob {
id: row.get(0)?,
project_id: row.get(1)?,
entity_type: row.get(2)?,
entity_iid: row.get(3)?,
entity_local_id: row.get(4)?,
job_type: row.get(5)?,
payload_json: row.get(6)?,
attempts: row.get(7)?,
})
})?
.query_map(
rusqlite::params![now, job_type, batch_size as i64, project_id],
|row| {
Ok(PendingJob {
id: row.get(0)?,
project_id: row.get(1)?,
entity_type: row.get(2)?,
entity_iid: row.get(3)?,
entity_local_id: row.get(4)?,
job_type: row.get(5)?,
payload_json: row.get(6)?,
attempts: row.get(7)?,
})
},
)?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(jobs)
@@ -152,19 +161,69 @@ pub fn reclaim_stale_locks(conn: &Connection, stale_threshold_minutes: u32) -> R
Ok(changes)
}
/// Count pending jobs by job_type (for stats/progress).
pub fn count_pending_jobs(conn: &Connection) -> Result<HashMap<String, usize>> {
let mut stmt = conn.prepare_cached(
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches GROUP BY job_type",
)?;
/// Count pending jobs by job_type, optionally scoped to a project.
pub fn count_pending_jobs(
conn: &Connection,
project_id: Option<i64>,
) -> Result<HashMap<String, usize>> {
let mut counts = HashMap::new();
let rows = stmt.query_map([], |row| {
match project_id {
Some(pid) => {
let mut stmt = conn.prepare_cached(
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches \
WHERE project_id = ?1 GROUP BY job_type",
)?;
let rows = stmt.query_map(rusqlite::params![pid], |row| {
let job_type: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((job_type, count as usize))
})?;
for row in rows {
let (job_type, count) = row?;
counts.insert(job_type, count);
}
}
None => {
let mut stmt = conn.prepare_cached(
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches GROUP BY job_type",
)?;
let rows = stmt.query_map([], |row| {
let job_type: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((job_type, count as usize))
})?;
for row in rows {
let (job_type, count) = row?;
counts.insert(job_type, count);
}
}
}
Ok(counts)
}
/// Count jobs that are actually claimable right now, by job_type.
///
/// Only counts jobs where `locked_at IS NULL` and `(next_retry_at IS NULL OR next_retry_at <= now)`,
/// matching the exact WHERE clause used by [`claim_jobs`]. This gives an accurate total
/// for progress bars — unlike [`count_pending_jobs`] which includes locked and backing-off jobs.
pub fn count_claimable_jobs(conn: &Connection, project_id: i64) -> Result<HashMap<String, usize>> {
let now = now_ms();
let mut counts = HashMap::new();
let mut stmt = conn.prepare_cached(
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches \
WHERE project_id = ?1 \
AND locked_at IS NULL \
AND (next_retry_at IS NULL OR next_retry_at <= ?2) \
GROUP BY job_type",
)?;
let rows = stmt.query_map(rusqlite::params![project_id, now], |row| {
let job_type: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((job_type, count as usize))
})?;
for row in rows {
let (job_type, count) = row?;
counts.insert(job_type, count);