fix: Project-scoped job claiming, structured rate-limit logging, RRF total_cmp
Targeted fixes across multiple subsystems: dependent_queue: - Add project_id parameter to claim_jobs() for project-scoped job claiming, preventing cross-project job theft during concurrent multi-project ingestion - Add project_id parameter to count_pending_jobs() with optional scoping (None returns global counts, Some(pid) returns per-project counts) gitlab/client: - Downgrade rate-limit log from warn to info (429s are expected operational behavior, not warnings) and add structured fields (path, status_code) for better log filtering and aggregation gitlab/transformers/discussion: - Add tracing::warn on invalid timestamp parse instead of silent fallback to epoch 0, making data quality issues visible in logs ingestion/merge_requests: - Remove duplicate doc comment on upsert_label_tx search/rrf: - Replace partial_cmp().unwrap_or() with total_cmp() for f64 sorting, eliminating the NaN edge case entirely (total_cmp treats NaN consistently) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -54,11 +54,16 @@ pub fn enqueue_job(
|
||||
Ok(changes > 0)
|
||||
}
|
||||
|
||||
/// Claim a batch of jobs for processing.
|
||||
/// Claim a batch of jobs for processing, scoped to a specific project.
|
||||
///
|
||||
/// Atomically selects and locks jobs within a transaction. Only claims jobs
|
||||
/// where `locked_at IS NULL` and `(next_retry_at IS NULL OR next_retry_at <= now)`.
|
||||
pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Result<Vec<PendingJob>> {
|
||||
pub fn claim_jobs(
|
||||
conn: &Connection,
|
||||
job_type: &str,
|
||||
project_id: i64,
|
||||
batch_size: usize,
|
||||
) -> Result<Vec<PendingJob>> {
|
||||
if batch_size == 0 {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
@@ -73,6 +78,7 @@ pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Resul
|
||||
WHERE id IN (
|
||||
SELECT id FROM pending_dependent_fetches
|
||||
WHERE job_type = ?2
|
||||
AND project_id = ?4
|
||||
AND locked_at IS NULL
|
||||
AND (next_retry_at IS NULL OR next_retry_at <= ?1)
|
||||
ORDER BY enqueued_at ASC
|
||||
@@ -83,18 +89,21 @@ pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Resul
|
||||
)?;
|
||||
|
||||
let jobs = stmt
|
||||
.query_map(rusqlite::params![now, job_type, batch_size as i64], |row| {
|
||||
Ok(PendingJob {
|
||||
id: row.get(0)?,
|
||||
project_id: row.get(1)?,
|
||||
entity_type: row.get(2)?,
|
||||
entity_iid: row.get(3)?,
|
||||
entity_local_id: row.get(4)?,
|
||||
job_type: row.get(5)?,
|
||||
payload_json: row.get(6)?,
|
||||
attempts: row.get(7)?,
|
||||
})
|
||||
})?
|
||||
.query_map(
|
||||
rusqlite::params![now, job_type, batch_size as i64, project_id],
|
||||
|row| {
|
||||
Ok(PendingJob {
|
||||
id: row.get(0)?,
|
||||
project_id: row.get(1)?,
|
||||
entity_type: row.get(2)?,
|
||||
entity_iid: row.get(3)?,
|
||||
entity_local_id: row.get(4)?,
|
||||
job_type: row.get(5)?,
|
||||
payload_json: row.get(6)?,
|
||||
attempts: row.get(7)?,
|
||||
})
|
||||
},
|
||||
)?
|
||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||
|
||||
Ok(jobs)
|
||||
@@ -152,19 +161,69 @@ pub fn reclaim_stale_locks(conn: &Connection, stale_threshold_minutes: u32) -> R
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
/// Count pending jobs by job_type (for stats/progress).
|
||||
pub fn count_pending_jobs(conn: &Connection) -> Result<HashMap<String, usize>> {
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches GROUP BY job_type",
|
||||
)?;
|
||||
|
||||
/// Count pending jobs by job_type, optionally scoped to a project.
|
||||
pub fn count_pending_jobs(
|
||||
conn: &Connection,
|
||||
project_id: Option<i64>,
|
||||
) -> Result<HashMap<String, usize>> {
|
||||
let mut counts = HashMap::new();
|
||||
let rows = stmt.query_map([], |row| {
|
||||
|
||||
match project_id {
|
||||
Some(pid) => {
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches \
|
||||
WHERE project_id = ?1 GROUP BY job_type",
|
||||
)?;
|
||||
let rows = stmt.query_map(rusqlite::params![pid], |row| {
|
||||
let job_type: String = row.get(0)?;
|
||||
let count: i64 = row.get(1)?;
|
||||
Ok((job_type, count as usize))
|
||||
})?;
|
||||
for row in rows {
|
||||
let (job_type, count) = row?;
|
||||
counts.insert(job_type, count);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches GROUP BY job_type",
|
||||
)?;
|
||||
let rows = stmt.query_map([], |row| {
|
||||
let job_type: String = row.get(0)?;
|
||||
let count: i64 = row.get(1)?;
|
||||
Ok((job_type, count as usize))
|
||||
})?;
|
||||
for row in rows {
|
||||
let (job_type, count) = row?;
|
||||
counts.insert(job_type, count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(counts)
|
||||
}
|
||||
|
||||
/// Count jobs that are actually claimable right now, by job_type.
|
||||
///
|
||||
/// Only counts jobs where `locked_at IS NULL` and `(next_retry_at IS NULL OR next_retry_at <= now)`,
|
||||
/// matching the exact WHERE clause used by [`claim_jobs`]. This gives an accurate total
|
||||
/// for progress bars — unlike [`count_pending_jobs`] which includes locked and backing-off jobs.
|
||||
pub fn count_claimable_jobs(conn: &Connection, project_id: i64) -> Result<HashMap<String, usize>> {
|
||||
let now = now_ms();
|
||||
let mut counts = HashMap::new();
|
||||
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches \
|
||||
WHERE project_id = ?1 \
|
||||
AND locked_at IS NULL \
|
||||
AND (next_retry_at IS NULL OR next_retry_at <= ?2) \
|
||||
GROUP BY job_type",
|
||||
)?;
|
||||
let rows = stmt.query_map(rusqlite::params![project_id, now], |row| {
|
||||
let job_type: String = row.get(0)?;
|
||||
let count: i64 = row.get(1)?;
|
||||
Ok((job_type, count as usize))
|
||||
})?;
|
||||
|
||||
for row in rows {
|
||||
let (job_type, count) = row?;
|
||||
counts.insert(job_type, count);
|
||||
|
||||
Reference in New Issue
Block a user